]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Update PFIRST, PNEXT for pred_desc
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
cc48affe 35#include "fpu/softfloat.h"
38388f7e 36
757f9cff 37
9ee3a611
RH
38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
38cadeba
RH
41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
757f9cff
RH
43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
c4e7c493 46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 49
ccd841c3
RH
50/*
51 * Helpers for extracting complex instruction fields.
52 */
53
54/* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
451e4ffd 57static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
58{
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61}
62
451e4ffd 63static int tszimm_shr(DisasContext *s, int x)
ccd841c3 64{
451e4ffd 65 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
66}
67
68/* See e.g. LSL (immediate, predicated). */
451e4ffd 69static int tszimm_shl(DisasContext *s, int x)
ccd841c3 70{
451e4ffd 71 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
72}
73
451e4ffd 74static inline int plus1(DisasContext *s, int x)
24e82e68
RH
75{
76 return x + 1;
77}
78
f25a2361 79/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 80static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
81{
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83}
84
451e4ffd 85static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
86{
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88}
89
c4e7c493
RH
90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
451e4ffd 93static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
94{
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97}
98
38388f7e
RH
99/*
100 * Include the generated decoder.
101 */
102
139c1837 103#include "decode-sve.c.inc"
38388f7e
RH
104
105/*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
d1822297
RH
109/* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112static inline int pred_full_reg_offset(DisasContext *s, int regno)
113{
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115}
116
117/* Return the byte size of the whole predicate register, VL / 64. */
118static inline int pred_full_reg_size(DisasContext *s)
119{
120 return s->sve_len >> 3;
121}
122
516e246a
RH
123/* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131static int size_for_gvec(int size)
132{
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138}
139
140static int pred_gvec_reg_size(DisasContext *s)
141{
142 return size_for_gvec(pred_full_reg_size(s));
143}
144
40e32e5a
RH
145/* Invoke an out-of-line helper on 2 Zregs. */
146static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
147 int rd, int rn, int data)
148{
149 unsigned vsz = vec_full_reg_size(s);
150 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn),
152 vsz, vsz, data, fn);
153}
154
e645d1a1
RH
155/* Invoke an out-of-line helper on 3 Zregs. */
156static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
157 int rd, int rn, int rm, int data)
158{
159 unsigned vsz = vec_full_reg_size(s);
160 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm),
163 vsz, vsz, data, fn);
164}
165
96a461f7
RH
166/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
167static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
168 int rd, int rn, int pg, int data)
169{
170 unsigned vsz = vec_full_reg_size(s);
171 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
172 vec_full_reg_offset(s, rn),
173 pred_full_reg_offset(s, pg),
174 vsz, vsz, data, fn);
175}
176
36cbb7a8
RH
177/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
178static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
179 int rd, int rn, int rm, int pg, int data)
180{
181 unsigned vsz = vec_full_reg_size(s);
182 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
183 vec_full_reg_offset(s, rn),
184 vec_full_reg_offset(s, rm),
185 pred_full_reg_offset(s, pg),
186 vsz, vsz, data, fn);
187}
f7d79c41 188
36cbb7a8 189/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
190static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
191 int esz, int rd, int rn)
38388f7e 192{
f7d79c41
RH
193 unsigned vsz = vec_full_reg_size(s);
194 gvec_fn(esz, vec_full_reg_offset(s, rd),
195 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
196}
197
39eea561 198/* Invoke a vector expander on three Zregs. */
28c4da31
RH
199static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
200 int esz, int rd, int rn, int rm)
38388f7e 201{
28c4da31
RH
202 unsigned vsz = vec_full_reg_size(s);
203 gvec_fn(esz, vec_full_reg_offset(s, rd),
204 vec_full_reg_offset(s, rn),
205 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
206}
207
39eea561
RH
208/* Invoke a vector move on two Zregs. */
209static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 210{
f7d79c41
RH
211 if (sve_access_check(s)) {
212 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
213 }
214 return true;
38388f7e
RH
215}
216
d9d78dcc
RH
217/* Initialize a Zreg with replications of a 64-bit immediate. */
218static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
219{
220 unsigned vsz = vec_full_reg_size(s);
8711e71f 221 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
222}
223
516e246a 224/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
225static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
226 int rd, int rn, int rm)
516e246a 227{
dd81a8d7
RH
228 unsigned psz = pred_gvec_reg_size(s);
229 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
230 pred_full_reg_offset(s, rn),
231 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
232}
233
234/* Invoke a vector move on two Pregs. */
235static bool do_mov_p(DisasContext *s, int rd, int rn)
236{
d0b2df5a
RH
237 if (sve_access_check(s)) {
238 unsigned psz = pred_gvec_reg_size(s);
239 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
240 pred_full_reg_offset(s, rn), psz, psz);
241 }
242 return true;
516e246a
RH
243}
244
9e18d7a6
RH
245/* Set the cpu flags as per a return from an SVE helper. */
246static void do_pred_flags(TCGv_i32 t)
247{
248 tcg_gen_mov_i32(cpu_NF, t);
249 tcg_gen_andi_i32(cpu_ZF, t, 2);
250 tcg_gen_andi_i32(cpu_CF, t, 1);
251 tcg_gen_movi_i32(cpu_VF, 0);
252}
253
254/* Subroutines computing the ARM PredTest psuedofunction. */
255static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
256{
257 TCGv_i32 t = tcg_temp_new_i32();
258
259 gen_helper_sve_predtest1(t, d, g);
260 do_pred_flags(t);
261 tcg_temp_free_i32(t);
262}
263
264static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
265{
266 TCGv_ptr dptr = tcg_temp_new_ptr();
267 TCGv_ptr gptr = tcg_temp_new_ptr();
268 TCGv_i32 t;
269
270 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
271 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
272 t = tcg_const_i32(words);
273
274 gen_helper_sve_predtest(t, dptr, gptr, t);
275 tcg_temp_free_ptr(dptr);
276 tcg_temp_free_ptr(gptr);
277
278 do_pred_flags(t);
279 tcg_temp_free_i32(t);
280}
281
028e2a7b
RH
282/* For each element size, the bits within a predicate word that are active. */
283const uint64_t pred_esz_masks[4] = {
284 0xffffffffffffffffull, 0x5555555555555555ull,
285 0x1111111111111111ull, 0x0101010101010101ull
286};
287
39eea561
RH
288/*
289 *** SVE Logical - Unpredicated Group
290 */
291
28c4da31
RH
292static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
293{
294 if (sve_access_check(s)) {
295 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
296 }
297 return true;
298}
299
3a7be554 300static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 301{
28c4da31 302 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
303}
304
3a7be554 305static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 306{
28c4da31 307 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
308}
309
3a7be554 310static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 311{
28c4da31 312 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
313}
314
3a7be554 315static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 316{
28c4da31 317 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 318}
d1822297 319
fea98f9c
RH
320/*
321 *** SVE Integer Arithmetic - Unpredicated Group
322 */
323
3a7be554 324static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 325{
28c4da31 326 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
327}
328
3a7be554 329static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 330{
28c4da31 331 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
332}
333
3a7be554 334static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 335{
28c4da31 336 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
337}
338
3a7be554 339static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 340{
28c4da31 341 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
342}
343
3a7be554 344static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 345{
28c4da31 346 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
347}
348
3a7be554 349static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 350{
28c4da31 351 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
352}
353
f97cfd59
RH
354/*
355 *** SVE Integer Arithmetic - Binary Predicated Group
356 */
357
358static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
359{
f97cfd59
RH
360 if (fn == NULL) {
361 return false;
362 }
363 if (sve_access_check(s)) {
36cbb7a8 364 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
365 }
366 return true;
367}
368
a2103582
RH
369/* Select active elememnts from Zn and inactive elements from Zm,
370 * storing the result in Zd.
371 */
372static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
373{
374 static gen_helper_gvec_4 * const fns[4] = {
375 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
376 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
377 };
36cbb7a8 378 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
379}
380
f97cfd59 381#define DO_ZPZZ(NAME, name) \
3a7be554 382static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
383{ \
384 static gen_helper_gvec_4 * const fns[4] = { \
385 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
386 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
387 }; \
388 return do_zpzz_ool(s, a, fns[a->esz]); \
389}
390
391DO_ZPZZ(AND, and)
392DO_ZPZZ(EOR, eor)
393DO_ZPZZ(ORR, orr)
394DO_ZPZZ(BIC, bic)
395
396DO_ZPZZ(ADD, add)
397DO_ZPZZ(SUB, sub)
398
399DO_ZPZZ(SMAX, smax)
400DO_ZPZZ(UMAX, umax)
401DO_ZPZZ(SMIN, smin)
402DO_ZPZZ(UMIN, umin)
403DO_ZPZZ(SABD, sabd)
404DO_ZPZZ(UABD, uabd)
405
406DO_ZPZZ(MUL, mul)
407DO_ZPZZ(SMULH, smulh)
408DO_ZPZZ(UMULH, umulh)
409
27721dbb
RH
410DO_ZPZZ(ASR, asr)
411DO_ZPZZ(LSR, lsr)
412DO_ZPZZ(LSL, lsl)
413
3a7be554 414static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
415{
416 static gen_helper_gvec_4 * const fns[4] = {
417 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
418 };
419 return do_zpzz_ool(s, a, fns[a->esz]);
420}
421
3a7be554 422static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
423{
424 static gen_helper_gvec_4 * const fns[4] = {
425 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
426 };
427 return do_zpzz_ool(s, a, fns[a->esz]);
428}
429
3a7be554 430static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
431{
432 if (sve_access_check(s)) {
433 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
434 }
435 return true;
436}
d3fe4a29 437
f97cfd59
RH
438#undef DO_ZPZZ
439
afac6d04
RH
440/*
441 *** SVE Integer Arithmetic - Unary Predicated Group
442 */
443
444static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
445{
446 if (fn == NULL) {
447 return false;
448 }
449 if (sve_access_check(s)) {
96a461f7 450 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
afac6d04
RH
451 }
452 return true;
453}
454
455#define DO_ZPZ(NAME, name) \
3a7be554 456static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
457{ \
458 static gen_helper_gvec_3 * const fns[4] = { \
459 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
460 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
461 }; \
462 return do_zpz_ool(s, a, fns[a->esz]); \
463}
464
465DO_ZPZ(CLS, cls)
466DO_ZPZ(CLZ, clz)
467DO_ZPZ(CNT_zpz, cnt_zpz)
468DO_ZPZ(CNOT, cnot)
469DO_ZPZ(NOT_zpz, not_zpz)
470DO_ZPZ(ABS, abs)
471DO_ZPZ(NEG, neg)
472
3a7be554 473static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
474{
475 static gen_helper_gvec_3 * const fns[4] = {
476 NULL,
477 gen_helper_sve_fabs_h,
478 gen_helper_sve_fabs_s,
479 gen_helper_sve_fabs_d
480 };
481 return do_zpz_ool(s, a, fns[a->esz]);
482}
483
3a7be554 484static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
485{
486 static gen_helper_gvec_3 * const fns[4] = {
487 NULL,
488 gen_helper_sve_fneg_h,
489 gen_helper_sve_fneg_s,
490 gen_helper_sve_fneg_d
491 };
492 return do_zpz_ool(s, a, fns[a->esz]);
493}
494
3a7be554 495static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
496{
497 static gen_helper_gvec_3 * const fns[4] = {
498 NULL,
499 gen_helper_sve_sxtb_h,
500 gen_helper_sve_sxtb_s,
501 gen_helper_sve_sxtb_d
502 };
503 return do_zpz_ool(s, a, fns[a->esz]);
504}
505
3a7be554 506static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
507{
508 static gen_helper_gvec_3 * const fns[4] = {
509 NULL,
510 gen_helper_sve_uxtb_h,
511 gen_helper_sve_uxtb_s,
512 gen_helper_sve_uxtb_d
513 };
514 return do_zpz_ool(s, a, fns[a->esz]);
515}
516
3a7be554 517static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
518{
519 static gen_helper_gvec_3 * const fns[4] = {
520 NULL, NULL,
521 gen_helper_sve_sxth_s,
522 gen_helper_sve_sxth_d
523 };
524 return do_zpz_ool(s, a, fns[a->esz]);
525}
526
3a7be554 527static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
528{
529 static gen_helper_gvec_3 * const fns[4] = {
530 NULL, NULL,
531 gen_helper_sve_uxth_s,
532 gen_helper_sve_uxth_d
533 };
534 return do_zpz_ool(s, a, fns[a->esz]);
535}
536
3a7be554 537static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
538{
539 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
540}
541
3a7be554 542static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
543{
544 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
545}
546
547#undef DO_ZPZ
548
047cec97
RH
549/*
550 *** SVE Integer Reduction Group
551 */
552
553typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
554static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
555 gen_helper_gvec_reduc *fn)
556{
557 unsigned vsz = vec_full_reg_size(s);
558 TCGv_ptr t_zn, t_pg;
559 TCGv_i32 desc;
560 TCGv_i64 temp;
561
562 if (fn == NULL) {
563 return false;
564 }
565 if (!sve_access_check(s)) {
566 return true;
567 }
568
569 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
570 temp = tcg_temp_new_i64();
571 t_zn = tcg_temp_new_ptr();
572 t_pg = tcg_temp_new_ptr();
573
574 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
575 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
576 fn(temp, t_zn, t_pg, desc);
577 tcg_temp_free_ptr(t_zn);
578 tcg_temp_free_ptr(t_pg);
579 tcg_temp_free_i32(desc);
580
581 write_fp_dreg(s, a->rd, temp);
582 tcg_temp_free_i64(temp);
583 return true;
584}
585
586#define DO_VPZ(NAME, name) \
3a7be554 587static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
588{ \
589 static gen_helper_gvec_reduc * const fns[4] = { \
590 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
591 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
592 }; \
593 return do_vpz_ool(s, a, fns[a->esz]); \
594}
595
596DO_VPZ(ORV, orv)
597DO_VPZ(ANDV, andv)
598DO_VPZ(EORV, eorv)
599
600DO_VPZ(UADDV, uaddv)
601DO_VPZ(SMAXV, smaxv)
602DO_VPZ(UMAXV, umaxv)
603DO_VPZ(SMINV, sminv)
604DO_VPZ(UMINV, uminv)
605
3a7be554 606static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
607{
608 static gen_helper_gvec_reduc * const fns[4] = {
609 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
610 gen_helper_sve_saddv_s, NULL
611 };
612 return do_vpz_ool(s, a, fns[a->esz]);
613}
614
615#undef DO_VPZ
616
ccd841c3
RH
617/*
618 *** SVE Shift by Immediate - Predicated Group
619 */
620
60245996
RH
621/*
622 * Copy Zn into Zd, storing zeros into inactive elements.
623 * If invert, store zeros into the active elements.
ccd841c3 624 */
60245996
RH
625static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
626 int esz, bool invert)
ccd841c3 627{
60245996
RH
628 static gen_helper_gvec_3 * const fns[4] = {
629 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
630 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 631 };
60245996 632
ccd841c3 633 if (sve_access_check(s)) {
96a461f7 634 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
635 }
636 return true;
637}
638
639static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
640 gen_helper_gvec_3 *fn)
641{
642 if (sve_access_check(s)) {
96a461f7 643 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
644 }
645 return true;
646}
647
3a7be554 648static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
649{
650 static gen_helper_gvec_3 * const fns[4] = {
651 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
652 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
653 };
654 if (a->esz < 0) {
655 /* Invalid tsz encoding -- see tszimm_esz. */
656 return false;
657 }
658 /* Shift by element size is architecturally valid. For
659 arithmetic right-shift, it's the same as by one less. */
660 a->imm = MIN(a->imm, (8 << a->esz) - 1);
661 return do_zpzi_ool(s, a, fns[a->esz]);
662}
663
3a7be554 664static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
665{
666 static gen_helper_gvec_3 * const fns[4] = {
667 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
668 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
669 };
670 if (a->esz < 0) {
671 return false;
672 }
673 /* Shift by element size is architecturally valid.
674 For logical shifts, it is a zeroing operation. */
675 if (a->imm >= (8 << a->esz)) {
60245996 676 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
677 } else {
678 return do_zpzi_ool(s, a, fns[a->esz]);
679 }
680}
681
3a7be554 682static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
683{
684 static gen_helper_gvec_3 * const fns[4] = {
685 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
686 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
687 };
688 if (a->esz < 0) {
689 return false;
690 }
691 /* Shift by element size is architecturally valid.
692 For logical shifts, it is a zeroing operation. */
693 if (a->imm >= (8 << a->esz)) {
60245996 694 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
695 } else {
696 return do_zpzi_ool(s, a, fns[a->esz]);
697 }
698}
699
3a7be554 700static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
701{
702 static gen_helper_gvec_3 * const fns[4] = {
703 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
704 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
705 };
706 if (a->esz < 0) {
707 return false;
708 }
709 /* Shift by element size is architecturally valid. For arithmetic
710 right shift for division, it is a zeroing operation. */
711 if (a->imm >= (8 << a->esz)) {
60245996 712 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
713 } else {
714 return do_zpzi_ool(s, a, fns[a->esz]);
715 }
716}
717
fe7f8dfb
RH
718/*
719 *** SVE Bitwise Shift - Predicated Group
720 */
721
722#define DO_ZPZW(NAME, name) \
3a7be554 723static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
724{ \
725 static gen_helper_gvec_4 * const fns[3] = { \
726 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
727 gen_helper_sve_##name##_zpzw_s, \
728 }; \
729 if (a->esz < 0 || a->esz >= 3) { \
730 return false; \
731 } \
732 return do_zpzz_ool(s, a, fns[a->esz]); \
733}
734
735DO_ZPZW(ASR, asr)
736DO_ZPZW(LSR, lsr)
737DO_ZPZW(LSL, lsl)
738
739#undef DO_ZPZW
740
d9d78dcc
RH
741/*
742 *** SVE Bitwise Shift - Unpredicated Group
743 */
744
745static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
746 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
747 int64_t, uint32_t, uint32_t))
748{
749 if (a->esz < 0) {
750 /* Invalid tsz encoding -- see tszimm_esz. */
751 return false;
752 }
753 if (sve_access_check(s)) {
754 unsigned vsz = vec_full_reg_size(s);
755 /* Shift by element size is architecturally valid. For
756 arithmetic right-shift, it's the same as by one less.
757 Otherwise it is a zeroing operation. */
758 if (a->imm >= 8 << a->esz) {
759 if (asr) {
760 a->imm = (8 << a->esz) - 1;
761 } else {
762 do_dupi_z(s, a->rd, 0);
763 return true;
764 }
765 }
766 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
767 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
768 }
769 return true;
770}
771
3a7be554 772static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
773{
774 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
775}
776
3a7be554 777static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
778{
779 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
780}
781
3a7be554 782static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
783{
784 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
785}
786
787static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
788{
789 if (fn == NULL) {
790 return false;
791 }
792 if (sve_access_check(s)) {
e645d1a1 793 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
d9d78dcc
RH
794 }
795 return true;
796}
797
798#define DO_ZZW(NAME, name) \
3a7be554 799static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
800{ \
801 static gen_helper_gvec_3 * const fns[4] = { \
802 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
803 gen_helper_sve_##name##_zzw_s, NULL \
804 }; \
805 return do_zzw_ool(s, a, fns[a->esz]); \
806}
807
808DO_ZZW(ASR, asr)
809DO_ZZW(LSR, lsr)
810DO_ZZW(LSL, lsl)
811
812#undef DO_ZZW
813
96a36e4a
RH
814/*
815 *** SVE Integer Multiply-Add Group
816 */
817
818static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
819 gen_helper_gvec_5 *fn)
820{
821 if (sve_access_check(s)) {
822 unsigned vsz = vec_full_reg_size(s);
823 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
824 vec_full_reg_offset(s, a->ra),
825 vec_full_reg_offset(s, a->rn),
826 vec_full_reg_offset(s, a->rm),
827 pred_full_reg_offset(s, a->pg),
828 vsz, vsz, 0, fn);
829 }
830 return true;
831}
832
833#define DO_ZPZZZ(NAME, name) \
3a7be554 834static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
835{ \
836 static gen_helper_gvec_5 * const fns[4] = { \
837 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
838 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
839 }; \
840 return do_zpzzz_ool(s, a, fns[a->esz]); \
841}
842
843DO_ZPZZZ(MLA, mla)
844DO_ZPZZZ(MLS, mls)
845
846#undef DO_ZPZZZ
847
9a56c9c3
RH
848/*
849 *** SVE Index Generation Group
850 */
851
852static void do_index(DisasContext *s, int esz, int rd,
853 TCGv_i64 start, TCGv_i64 incr)
854{
855 unsigned vsz = vec_full_reg_size(s);
856 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
857 TCGv_ptr t_zd = tcg_temp_new_ptr();
858
859 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
860 if (esz == 3) {
861 gen_helper_sve_index_d(t_zd, start, incr, desc);
862 } else {
863 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
864 static index_fn * const fns[3] = {
865 gen_helper_sve_index_b,
866 gen_helper_sve_index_h,
867 gen_helper_sve_index_s,
868 };
869 TCGv_i32 s32 = tcg_temp_new_i32();
870 TCGv_i32 i32 = tcg_temp_new_i32();
871
872 tcg_gen_extrl_i64_i32(s32, start);
873 tcg_gen_extrl_i64_i32(i32, incr);
874 fns[esz](t_zd, s32, i32, desc);
875
876 tcg_temp_free_i32(s32);
877 tcg_temp_free_i32(i32);
878 }
879 tcg_temp_free_ptr(t_zd);
880 tcg_temp_free_i32(desc);
881}
882
3a7be554 883static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
884{
885 if (sve_access_check(s)) {
886 TCGv_i64 start = tcg_const_i64(a->imm1);
887 TCGv_i64 incr = tcg_const_i64(a->imm2);
888 do_index(s, a->esz, a->rd, start, incr);
889 tcg_temp_free_i64(start);
890 tcg_temp_free_i64(incr);
891 }
892 return true;
893}
894
3a7be554 895static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
896{
897 if (sve_access_check(s)) {
898 TCGv_i64 start = tcg_const_i64(a->imm);
899 TCGv_i64 incr = cpu_reg(s, a->rm);
900 do_index(s, a->esz, a->rd, start, incr);
901 tcg_temp_free_i64(start);
902 }
903 return true;
904}
905
3a7be554 906static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
907{
908 if (sve_access_check(s)) {
909 TCGv_i64 start = cpu_reg(s, a->rn);
910 TCGv_i64 incr = tcg_const_i64(a->imm);
911 do_index(s, a->esz, a->rd, start, incr);
912 tcg_temp_free_i64(incr);
913 }
914 return true;
915}
916
3a7be554 917static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
918{
919 if (sve_access_check(s)) {
920 TCGv_i64 start = cpu_reg(s, a->rn);
921 TCGv_i64 incr = cpu_reg(s, a->rm);
922 do_index(s, a->esz, a->rd, start, incr);
923 }
924 return true;
925}
926
96f922cc
RH
927/*
928 *** SVE Stack Allocation Group
929 */
930
3a7be554 931static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 932{
5de56742
AC
933 if (sve_access_check(s)) {
934 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
935 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
936 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
937 }
96f922cc
RH
938 return true;
939}
940
3a7be554 941static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 942{
5de56742
AC
943 if (sve_access_check(s)) {
944 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
945 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
946 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
947 }
96f922cc
RH
948 return true;
949}
950
3a7be554 951static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 952{
5de56742
AC
953 if (sve_access_check(s)) {
954 TCGv_i64 reg = cpu_reg(s, a->rd);
955 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
956 }
96f922cc
RH
957 return true;
958}
959
4b242d9c
RH
960/*
961 *** SVE Compute Vector Address Group
962 */
963
964static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
965{
966 if (sve_access_check(s)) {
e645d1a1 967 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
968 }
969 return true;
970}
971
3a7be554 972static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
973{
974 return do_adr(s, a, gen_helper_sve_adr_p32);
975}
976
3a7be554 977static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
978{
979 return do_adr(s, a, gen_helper_sve_adr_p64);
980}
981
3a7be554 982static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
983{
984 return do_adr(s, a, gen_helper_sve_adr_s32);
985}
986
3a7be554 987static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
988{
989 return do_adr(s, a, gen_helper_sve_adr_u32);
990}
991
0762cd42
RH
992/*
993 *** SVE Integer Misc - Unpredicated Group
994 */
995
3a7be554 996static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
0762cd42
RH
997{
998 static gen_helper_gvec_2 * const fns[4] = {
999 NULL,
1000 gen_helper_sve_fexpa_h,
1001 gen_helper_sve_fexpa_s,
1002 gen_helper_sve_fexpa_d,
1003 };
1004 if (a->esz == 0) {
1005 return false;
1006 }
1007 if (sve_access_check(s)) {
40e32e5a 1008 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
0762cd42
RH
1009 }
1010 return true;
1011}
1012
3a7be554 1013static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1014{
1015 static gen_helper_gvec_3 * const fns[4] = {
1016 NULL,
1017 gen_helper_sve_ftssel_h,
1018 gen_helper_sve_ftssel_s,
1019 gen_helper_sve_ftssel_d,
1020 };
1021 if (a->esz == 0) {
1022 return false;
1023 }
1024 if (sve_access_check(s)) {
e645d1a1 1025 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
a1f233f2
RH
1026 }
1027 return true;
1028}
1029
516e246a
RH
1030/*
1031 *** SVE Predicate Logical Operations Group
1032 */
1033
1034static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1035 const GVecGen4 *gvec_op)
1036{
1037 if (!sve_access_check(s)) {
1038 return true;
1039 }
1040
1041 unsigned psz = pred_gvec_reg_size(s);
1042 int dofs = pred_full_reg_offset(s, a->rd);
1043 int nofs = pred_full_reg_offset(s, a->rn);
1044 int mofs = pred_full_reg_offset(s, a->rm);
1045 int gofs = pred_full_reg_offset(s, a->pg);
1046
dd81a8d7
RH
1047 if (!a->s) {
1048 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1049 return true;
1050 }
1051
516e246a
RH
1052 if (psz == 8) {
1053 /* Do the operation and the flags generation in temps. */
1054 TCGv_i64 pd = tcg_temp_new_i64();
1055 TCGv_i64 pn = tcg_temp_new_i64();
1056 TCGv_i64 pm = tcg_temp_new_i64();
1057 TCGv_i64 pg = tcg_temp_new_i64();
1058
1059 tcg_gen_ld_i64(pn, cpu_env, nofs);
1060 tcg_gen_ld_i64(pm, cpu_env, mofs);
1061 tcg_gen_ld_i64(pg, cpu_env, gofs);
1062
1063 gvec_op->fni8(pd, pn, pm, pg);
1064 tcg_gen_st_i64(pd, cpu_env, dofs);
1065
1066 do_predtest1(pd, pg);
1067
1068 tcg_temp_free_i64(pd);
1069 tcg_temp_free_i64(pn);
1070 tcg_temp_free_i64(pm);
1071 tcg_temp_free_i64(pg);
1072 } else {
1073 /* The operation and flags generation is large. The computation
1074 * of the flags depends on the original contents of the guarding
1075 * predicate. If the destination overwrites the guarding predicate,
1076 * then the easiest way to get this right is to save a copy.
1077 */
1078 int tofs = gofs;
1079 if (a->rd == a->pg) {
1080 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1081 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1082 }
1083
1084 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1085 do_predtest(s, dofs, tofs, psz / 8);
1086 }
1087 return true;
1088}
1089
1090static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1091{
1092 tcg_gen_and_i64(pd, pn, pm);
1093 tcg_gen_and_i64(pd, pd, pg);
1094}
1095
1096static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1097 TCGv_vec pm, TCGv_vec pg)
1098{
1099 tcg_gen_and_vec(vece, pd, pn, pm);
1100 tcg_gen_and_vec(vece, pd, pd, pg);
1101}
1102
3a7be554 1103static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1104{
1105 static const GVecGen4 op = {
1106 .fni8 = gen_and_pg_i64,
1107 .fniv = gen_and_pg_vec,
1108 .fno = gen_helper_sve_and_pppp,
1109 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1110 };
dd81a8d7
RH
1111
1112 if (!a->s) {
1113 if (!sve_access_check(s)) {
1114 return true;
1115 }
1116 if (a->rn == a->rm) {
1117 if (a->pg == a->rn) {
1118 do_mov_p(s, a->rd, a->rn);
1119 } else {
1120 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1121 }
1122 return true;
1123 } else if (a->pg == a->rn || a->pg == a->rm) {
1124 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1125 return true;
516e246a 1126 }
516e246a 1127 }
dd81a8d7 1128 return do_pppp_flags(s, a, &op);
516e246a
RH
1129}
1130
1131static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1132{
1133 tcg_gen_andc_i64(pd, pn, pm);
1134 tcg_gen_and_i64(pd, pd, pg);
1135}
1136
1137static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1138 TCGv_vec pm, TCGv_vec pg)
1139{
1140 tcg_gen_andc_vec(vece, pd, pn, pm);
1141 tcg_gen_and_vec(vece, pd, pd, pg);
1142}
1143
3a7be554 1144static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1145{
1146 static const GVecGen4 op = {
1147 .fni8 = gen_bic_pg_i64,
1148 .fniv = gen_bic_pg_vec,
1149 .fno = gen_helper_sve_bic_pppp,
1150 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1151 };
dd81a8d7
RH
1152
1153 if (!a->s && a->pg == a->rn) {
1154 if (sve_access_check(s)) {
1155 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1156 }
1157 return true;
516e246a 1158 }
dd81a8d7 1159 return do_pppp_flags(s, a, &op);
516e246a
RH
1160}
1161
1162static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1163{
1164 tcg_gen_xor_i64(pd, pn, pm);
1165 tcg_gen_and_i64(pd, pd, pg);
1166}
1167
1168static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1169 TCGv_vec pm, TCGv_vec pg)
1170{
1171 tcg_gen_xor_vec(vece, pd, pn, pm);
1172 tcg_gen_and_vec(vece, pd, pd, pg);
1173}
1174
3a7be554 1175static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1176{
1177 static const GVecGen4 op = {
1178 .fni8 = gen_eor_pg_i64,
1179 .fniv = gen_eor_pg_vec,
1180 .fno = gen_helper_sve_eor_pppp,
1181 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1182 };
dd81a8d7 1183 return do_pppp_flags(s, a, &op);
516e246a
RH
1184}
1185
3a7be554 1186static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1187{
516e246a
RH
1188 if (a->s) {
1189 return false;
516e246a 1190 }
d4bc6232
RH
1191 if (sve_access_check(s)) {
1192 unsigned psz = pred_gvec_reg_size(s);
1193 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1194 pred_full_reg_offset(s, a->pg),
1195 pred_full_reg_offset(s, a->rn),
1196 pred_full_reg_offset(s, a->rm), psz, psz);
1197 }
1198 return true;
516e246a
RH
1199}
1200
1201static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1202{
1203 tcg_gen_or_i64(pd, pn, pm);
1204 tcg_gen_and_i64(pd, pd, pg);
1205}
1206
1207static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1208 TCGv_vec pm, TCGv_vec pg)
1209{
1210 tcg_gen_or_vec(vece, pd, pn, pm);
1211 tcg_gen_and_vec(vece, pd, pd, pg);
1212}
1213
3a7be554 1214static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1215{
1216 static const GVecGen4 op = {
1217 .fni8 = gen_orr_pg_i64,
1218 .fniv = gen_orr_pg_vec,
1219 .fno = gen_helper_sve_orr_pppp,
1220 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1221 };
dd81a8d7
RH
1222
1223 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1224 return do_mov_p(s, a->rd, a->rn);
516e246a 1225 }
dd81a8d7 1226 return do_pppp_flags(s, a, &op);
516e246a
RH
1227}
1228
1229static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1230{
1231 tcg_gen_orc_i64(pd, pn, pm);
1232 tcg_gen_and_i64(pd, pd, pg);
1233}
1234
1235static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1236 TCGv_vec pm, TCGv_vec pg)
1237{
1238 tcg_gen_orc_vec(vece, pd, pn, pm);
1239 tcg_gen_and_vec(vece, pd, pd, pg);
1240}
1241
3a7be554 1242static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1243{
1244 static const GVecGen4 op = {
1245 .fni8 = gen_orn_pg_i64,
1246 .fniv = gen_orn_pg_vec,
1247 .fno = gen_helper_sve_orn_pppp,
1248 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1249 };
dd81a8d7 1250 return do_pppp_flags(s, a, &op);
516e246a
RH
1251}
1252
1253static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1254{
1255 tcg_gen_or_i64(pd, pn, pm);
1256 tcg_gen_andc_i64(pd, pg, pd);
1257}
1258
1259static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1260 TCGv_vec pm, TCGv_vec pg)
1261{
1262 tcg_gen_or_vec(vece, pd, pn, pm);
1263 tcg_gen_andc_vec(vece, pd, pg, pd);
1264}
1265
3a7be554 1266static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1267{
1268 static const GVecGen4 op = {
1269 .fni8 = gen_nor_pg_i64,
1270 .fniv = gen_nor_pg_vec,
1271 .fno = gen_helper_sve_nor_pppp,
1272 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1273 };
dd81a8d7 1274 return do_pppp_flags(s, a, &op);
516e246a
RH
1275}
1276
1277static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1278{
1279 tcg_gen_and_i64(pd, pn, pm);
1280 tcg_gen_andc_i64(pd, pg, pd);
1281}
1282
1283static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1284 TCGv_vec pm, TCGv_vec pg)
1285{
1286 tcg_gen_and_vec(vece, pd, pn, pm);
1287 tcg_gen_andc_vec(vece, pd, pg, pd);
1288}
1289
3a7be554 1290static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1291{
1292 static const GVecGen4 op = {
1293 .fni8 = gen_nand_pg_i64,
1294 .fniv = gen_nand_pg_vec,
1295 .fno = gen_helper_sve_nand_pppp,
1296 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1297 };
dd81a8d7 1298 return do_pppp_flags(s, a, &op);
516e246a
RH
1299}
1300
9e18d7a6
RH
1301/*
1302 *** SVE Predicate Misc Group
1303 */
1304
3a7be554 1305static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1306{
1307 if (sve_access_check(s)) {
1308 int nofs = pred_full_reg_offset(s, a->rn);
1309 int gofs = pred_full_reg_offset(s, a->pg);
1310 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1311
1312 if (words == 1) {
1313 TCGv_i64 pn = tcg_temp_new_i64();
1314 TCGv_i64 pg = tcg_temp_new_i64();
1315
1316 tcg_gen_ld_i64(pn, cpu_env, nofs);
1317 tcg_gen_ld_i64(pg, cpu_env, gofs);
1318 do_predtest1(pn, pg);
1319
1320 tcg_temp_free_i64(pn);
1321 tcg_temp_free_i64(pg);
1322 } else {
1323 do_predtest(s, nofs, gofs, words);
1324 }
1325 }
1326 return true;
1327}
1328
028e2a7b
RH
1329/* See the ARM pseudocode DecodePredCount. */
1330static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1331{
1332 unsigned elements = fullsz >> esz;
1333 unsigned bound;
1334
1335 switch (pattern) {
1336 case 0x0: /* POW2 */
1337 return pow2floor(elements);
1338 case 0x1: /* VL1 */
1339 case 0x2: /* VL2 */
1340 case 0x3: /* VL3 */
1341 case 0x4: /* VL4 */
1342 case 0x5: /* VL5 */
1343 case 0x6: /* VL6 */
1344 case 0x7: /* VL7 */
1345 case 0x8: /* VL8 */
1346 bound = pattern;
1347 break;
1348 case 0x9: /* VL16 */
1349 case 0xa: /* VL32 */
1350 case 0xb: /* VL64 */
1351 case 0xc: /* VL128 */
1352 case 0xd: /* VL256 */
1353 bound = 16 << (pattern - 9);
1354 break;
1355 case 0x1d: /* MUL4 */
1356 return elements - elements % 4;
1357 case 0x1e: /* MUL3 */
1358 return elements - elements % 3;
1359 case 0x1f: /* ALL */
1360 return elements;
1361 default: /* #uimm5 */
1362 return 0;
1363 }
1364 return elements >= bound ? bound : 0;
1365}
1366
1367/* This handles all of the predicate initialization instructions,
1368 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1369 * so that decode_pred_count returns 0. For SETFFR, we will have
1370 * set RD == 16 == FFR.
1371 */
1372static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1373{
1374 if (!sve_access_check(s)) {
1375 return true;
1376 }
1377
1378 unsigned fullsz = vec_full_reg_size(s);
1379 unsigned ofs = pred_full_reg_offset(s, rd);
1380 unsigned numelem, setsz, i;
1381 uint64_t word, lastword;
1382 TCGv_i64 t;
1383
1384 numelem = decode_pred_count(fullsz, pat, esz);
1385
1386 /* Determine what we must store into each bit, and how many. */
1387 if (numelem == 0) {
1388 lastword = word = 0;
1389 setsz = fullsz;
1390 } else {
1391 setsz = numelem << esz;
1392 lastword = word = pred_esz_masks[esz];
1393 if (setsz % 64) {
973558a3 1394 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1395 }
1396 }
1397
1398 t = tcg_temp_new_i64();
1399 if (fullsz <= 64) {
1400 tcg_gen_movi_i64(t, lastword);
1401 tcg_gen_st_i64(t, cpu_env, ofs);
1402 goto done;
1403 }
1404
1405 if (word == lastword) {
1406 unsigned maxsz = size_for_gvec(fullsz / 8);
1407 unsigned oprsz = size_for_gvec(setsz / 8);
1408
1409 if (oprsz * 8 == setsz) {
8711e71f 1410 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1411 goto done;
1412 }
028e2a7b
RH
1413 }
1414
1415 setsz /= 8;
1416 fullsz /= 8;
1417
1418 tcg_gen_movi_i64(t, word);
973558a3 1419 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1420 tcg_gen_st_i64(t, cpu_env, ofs + i);
1421 }
1422 if (lastword != word) {
1423 tcg_gen_movi_i64(t, lastword);
1424 tcg_gen_st_i64(t, cpu_env, ofs + i);
1425 i += 8;
1426 }
1427 if (i < fullsz) {
1428 tcg_gen_movi_i64(t, 0);
1429 for (; i < fullsz; i += 8) {
1430 tcg_gen_st_i64(t, cpu_env, ofs + i);
1431 }
1432 }
1433
1434 done:
1435 tcg_temp_free_i64(t);
1436
1437 /* PTRUES */
1438 if (setflag) {
1439 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1440 tcg_gen_movi_i32(cpu_CF, word == 0);
1441 tcg_gen_movi_i32(cpu_VF, 0);
1442 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1443 }
1444 return true;
1445}
1446
3a7be554 1447static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1448{
1449 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1450}
1451
3a7be554 1452static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1453{
1454 /* Note pat == 31 is #all, to set all elements. */
1455 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1456}
1457
3a7be554 1458static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1459{
1460 /* Note pat == 32 is #unimp, to set no elements. */
1461 return do_predset(s, 0, a->rd, 32, false);
1462}
1463
3a7be554 1464static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1465{
1466 /* The path through do_pppp_flags is complicated enough to want to avoid
1467 * duplication. Frob the arguments into the form of a predicated AND.
1468 */
1469 arg_rprr_s alt_a = {
1470 .rd = a->rd, .pg = a->pg, .s = a->s,
1471 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1472 };
3a7be554 1473 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1474}
1475
3a7be554 1476static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1477{
1478 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1479}
1480
3a7be554 1481static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1482{
1483 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1484}
1485
1486static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1487 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1488 TCGv_ptr, TCGv_i32))
1489{
1490 if (!sve_access_check(s)) {
1491 return true;
1492 }
1493
1494 TCGv_ptr t_pd = tcg_temp_new_ptr();
1495 TCGv_ptr t_pg = tcg_temp_new_ptr();
1496 TCGv_i32 t;
86300b5d 1497 unsigned desc = 0;
028e2a7b 1498
86300b5d
RH
1499 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1500 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1501
1502 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1503 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1504 t = tcg_const_i32(desc);
1505
1506 gen_fn(t, t_pd, t_pg, t);
1507 tcg_temp_free_ptr(t_pd);
1508 tcg_temp_free_ptr(t_pg);
1509
1510 do_pred_flags(t);
1511 tcg_temp_free_i32(t);
1512 return true;
1513}
1514
3a7be554 1515static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1516{
1517 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1518}
1519
3a7be554 1520static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1521{
1522 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1523}
1524
24e82e68
RH
1525/*
1526 *** SVE Element Count Group
1527 */
1528
1529/* Perform an inline saturating addition of a 32-bit value within
1530 * a 64-bit register. The second operand is known to be positive,
1531 * which halves the comparisions we must perform to bound the result.
1532 */
1533static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1534{
1535 int64_t ibound;
1536 TCGv_i64 bound;
1537 TCGCond cond;
1538
1539 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1540 if (u) {
1541 tcg_gen_ext32u_i64(reg, reg);
1542 } else {
1543 tcg_gen_ext32s_i64(reg, reg);
1544 }
1545 if (d) {
1546 tcg_gen_sub_i64(reg, reg, val);
1547 ibound = (u ? 0 : INT32_MIN);
1548 cond = TCG_COND_LT;
1549 } else {
1550 tcg_gen_add_i64(reg, reg, val);
1551 ibound = (u ? UINT32_MAX : INT32_MAX);
1552 cond = TCG_COND_GT;
1553 }
1554 bound = tcg_const_i64(ibound);
1555 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1556 tcg_temp_free_i64(bound);
1557}
1558
1559/* Similarly with 64-bit values. */
1560static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1561{
1562 TCGv_i64 t0 = tcg_temp_new_i64();
1563 TCGv_i64 t1 = tcg_temp_new_i64();
1564 TCGv_i64 t2;
1565
1566 if (u) {
1567 if (d) {
1568 tcg_gen_sub_i64(t0, reg, val);
1569 tcg_gen_movi_i64(t1, 0);
1570 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1571 } else {
1572 tcg_gen_add_i64(t0, reg, val);
1573 tcg_gen_movi_i64(t1, -1);
1574 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1575 }
1576 } else {
1577 if (d) {
1578 /* Detect signed overflow for subtraction. */
1579 tcg_gen_xor_i64(t0, reg, val);
1580 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1581 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1582 tcg_gen_and_i64(t0, t0, reg);
1583
1584 /* Bound the result. */
1585 tcg_gen_movi_i64(reg, INT64_MIN);
1586 t2 = tcg_const_i64(0);
1587 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1588 } else {
1589 /* Detect signed overflow for addition. */
1590 tcg_gen_xor_i64(t0, reg, val);
1591 tcg_gen_add_i64(reg, reg, val);
1592 tcg_gen_xor_i64(t1, reg, val);
1593 tcg_gen_andc_i64(t0, t1, t0);
1594
1595 /* Bound the result. */
1596 tcg_gen_movi_i64(t1, INT64_MAX);
1597 t2 = tcg_const_i64(0);
1598 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1599 }
1600 tcg_temp_free_i64(t2);
1601 }
1602 tcg_temp_free_i64(t0);
1603 tcg_temp_free_i64(t1);
1604}
1605
1606/* Similarly with a vector and a scalar operand. */
1607static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1608 TCGv_i64 val, bool u, bool d)
1609{
1610 unsigned vsz = vec_full_reg_size(s);
1611 TCGv_ptr dptr, nptr;
1612 TCGv_i32 t32, desc;
1613 TCGv_i64 t64;
1614
1615 dptr = tcg_temp_new_ptr();
1616 nptr = tcg_temp_new_ptr();
1617 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1618 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1619 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1620
1621 switch (esz) {
1622 case MO_8:
1623 t32 = tcg_temp_new_i32();
1624 tcg_gen_extrl_i64_i32(t32, val);
1625 if (d) {
1626 tcg_gen_neg_i32(t32, t32);
1627 }
1628 if (u) {
1629 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1630 } else {
1631 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1632 }
1633 tcg_temp_free_i32(t32);
1634 break;
1635
1636 case MO_16:
1637 t32 = tcg_temp_new_i32();
1638 tcg_gen_extrl_i64_i32(t32, val);
1639 if (d) {
1640 tcg_gen_neg_i32(t32, t32);
1641 }
1642 if (u) {
1643 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1644 } else {
1645 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1646 }
1647 tcg_temp_free_i32(t32);
1648 break;
1649
1650 case MO_32:
1651 t64 = tcg_temp_new_i64();
1652 if (d) {
1653 tcg_gen_neg_i64(t64, val);
1654 } else {
1655 tcg_gen_mov_i64(t64, val);
1656 }
1657 if (u) {
1658 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1659 } else {
1660 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1661 }
1662 tcg_temp_free_i64(t64);
1663 break;
1664
1665 case MO_64:
1666 if (u) {
1667 if (d) {
1668 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1669 } else {
1670 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1671 }
1672 } else if (d) {
1673 t64 = tcg_temp_new_i64();
1674 tcg_gen_neg_i64(t64, val);
1675 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1676 tcg_temp_free_i64(t64);
1677 } else {
1678 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1679 }
1680 break;
1681
1682 default:
1683 g_assert_not_reached();
1684 }
1685
1686 tcg_temp_free_ptr(dptr);
1687 tcg_temp_free_ptr(nptr);
1688 tcg_temp_free_i32(desc);
1689}
1690
3a7be554 1691static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1692{
1693 if (sve_access_check(s)) {
1694 unsigned fullsz = vec_full_reg_size(s);
1695 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1696 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1697 }
1698 return true;
1699}
1700
3a7be554 1701static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1702{
1703 if (sve_access_check(s)) {
1704 unsigned fullsz = vec_full_reg_size(s);
1705 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1706 int inc = numelem * a->imm * (a->d ? -1 : 1);
1707 TCGv_i64 reg = cpu_reg(s, a->rd);
1708
1709 tcg_gen_addi_i64(reg, reg, inc);
1710 }
1711 return true;
1712}
1713
3a7be554 1714static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1715{
1716 if (!sve_access_check(s)) {
1717 return true;
1718 }
1719
1720 unsigned fullsz = vec_full_reg_size(s);
1721 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1722 int inc = numelem * a->imm;
1723 TCGv_i64 reg = cpu_reg(s, a->rd);
1724
1725 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1726 if (inc == 0) {
1727 if (a->u) {
1728 tcg_gen_ext32u_i64(reg, reg);
1729 } else {
1730 tcg_gen_ext32s_i64(reg, reg);
1731 }
1732 } else {
1733 TCGv_i64 t = tcg_const_i64(inc);
1734 do_sat_addsub_32(reg, t, a->u, a->d);
1735 tcg_temp_free_i64(t);
1736 }
1737 return true;
1738}
1739
3a7be554 1740static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1741{
1742 if (!sve_access_check(s)) {
1743 return true;
1744 }
1745
1746 unsigned fullsz = vec_full_reg_size(s);
1747 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1748 int inc = numelem * a->imm;
1749 TCGv_i64 reg = cpu_reg(s, a->rd);
1750
1751 if (inc != 0) {
1752 TCGv_i64 t = tcg_const_i64(inc);
1753 do_sat_addsub_64(reg, t, a->u, a->d);
1754 tcg_temp_free_i64(t);
1755 }
1756 return true;
1757}
1758
3a7be554 1759static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1760{
1761 if (a->esz == 0) {
1762 return false;
1763 }
1764
1765 unsigned fullsz = vec_full_reg_size(s);
1766 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1767 int inc = numelem * a->imm;
1768
1769 if (inc != 0) {
1770 if (sve_access_check(s)) {
1771 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1772 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1773 vec_full_reg_offset(s, a->rn),
1774 t, fullsz, fullsz);
1775 tcg_temp_free_i64(t);
1776 }
1777 } else {
1778 do_mov_z(s, a->rd, a->rn);
1779 }
1780 return true;
1781}
1782
3a7be554 1783static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1784{
1785 if (a->esz == 0) {
1786 return false;
1787 }
1788
1789 unsigned fullsz = vec_full_reg_size(s);
1790 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1791 int inc = numelem * a->imm;
1792
1793 if (inc != 0) {
1794 if (sve_access_check(s)) {
1795 TCGv_i64 t = tcg_const_i64(inc);
1796 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1797 tcg_temp_free_i64(t);
1798 }
1799 } else {
1800 do_mov_z(s, a->rd, a->rn);
1801 }
1802 return true;
1803}
1804
e1fa1164
RH
1805/*
1806 *** SVE Bitwise Immediate Group
1807 */
1808
1809static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1810{
1811 uint64_t imm;
1812 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1813 extract32(a->dbm, 0, 6),
1814 extract32(a->dbm, 6, 6))) {
1815 return false;
1816 }
1817 if (sve_access_check(s)) {
1818 unsigned vsz = vec_full_reg_size(s);
1819 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1820 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1821 }
1822 return true;
1823}
1824
3a7be554 1825static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1826{
1827 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1828}
1829
3a7be554 1830static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1831{
1832 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1833}
1834
3a7be554 1835static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1836{
1837 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1838}
1839
3a7be554 1840static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
1841{
1842 uint64_t imm;
1843 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1844 extract32(a->dbm, 0, 6),
1845 extract32(a->dbm, 6, 6))) {
1846 return false;
1847 }
1848 if (sve_access_check(s)) {
1849 do_dupi_z(s, a->rd, imm);
1850 }
1851 return true;
1852}
1853
f25a2361
RH
1854/*
1855 *** SVE Integer Wide Immediate - Predicated Group
1856 */
1857
1858/* Implement all merging copies. This is used for CPY (immediate),
1859 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1860 */
1861static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1862 TCGv_i64 val)
1863{
1864 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1865 static gen_cpy * const fns[4] = {
1866 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1867 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1868 };
1869 unsigned vsz = vec_full_reg_size(s);
1870 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1871 TCGv_ptr t_zd = tcg_temp_new_ptr();
1872 TCGv_ptr t_zn = tcg_temp_new_ptr();
1873 TCGv_ptr t_pg = tcg_temp_new_ptr();
1874
1875 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1876 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1877 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1878
1879 fns[esz](t_zd, t_zn, t_pg, val, desc);
1880
1881 tcg_temp_free_ptr(t_zd);
1882 tcg_temp_free_ptr(t_zn);
1883 tcg_temp_free_ptr(t_pg);
1884 tcg_temp_free_i32(desc);
1885}
1886
3a7be554 1887static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
1888{
1889 if (a->esz == 0) {
1890 return false;
1891 }
1892 if (sve_access_check(s)) {
1893 /* Decode the VFP immediate. */
1894 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1895 TCGv_i64 t_imm = tcg_const_i64(imm);
1896 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1897 tcg_temp_free_i64(t_imm);
1898 }
1899 return true;
1900}
1901
3a7be554 1902static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 1903{
3a7be554 1904 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1905 return false;
1906 }
1907 if (sve_access_check(s)) {
1908 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1909 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1910 tcg_temp_free_i64(t_imm);
1911 }
1912 return true;
1913}
1914
3a7be554 1915static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
1916{
1917 static gen_helper_gvec_2i * const fns[4] = {
1918 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1919 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1920 };
1921
3a7be554 1922 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1923 return false;
1924 }
1925 if (sve_access_check(s)) {
1926 unsigned vsz = vec_full_reg_size(s);
1927 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1928 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1929 pred_full_reg_offset(s, a->pg),
1930 t_imm, vsz, vsz, 0, fns[a->esz]);
1931 tcg_temp_free_i64(t_imm);
1932 }
1933 return true;
1934}
1935
b94f8f60
RH
1936/*
1937 *** SVE Permute Extract Group
1938 */
1939
3a7be554 1940static bool trans_EXT(DisasContext *s, arg_EXT *a)
b94f8f60
RH
1941{
1942 if (!sve_access_check(s)) {
1943 return true;
1944 }
1945
1946 unsigned vsz = vec_full_reg_size(s);
1947 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1948 unsigned n_siz = vsz - n_ofs;
1949 unsigned d = vec_full_reg_offset(s, a->rd);
1950 unsigned n = vec_full_reg_offset(s, a->rn);
1951 unsigned m = vec_full_reg_offset(s, a->rm);
1952
1953 /* Use host vector move insns if we have appropriate sizes
1954 * and no unfortunate overlap.
1955 */
1956 if (m != d
1957 && n_ofs == size_for_gvec(n_ofs)
1958 && n_siz == size_for_gvec(n_siz)
1959 && (d != n || n_siz <= n_ofs)) {
1960 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1961 if (n_ofs != 0) {
1962 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1963 }
1964 } else {
1965 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1966 }
1967 return true;
1968}
1969
30562ab7
RH
1970/*
1971 *** SVE Permute - Unpredicated Group
1972 */
1973
3a7be554 1974static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
1975{
1976 if (sve_access_check(s)) {
1977 unsigned vsz = vec_full_reg_size(s);
1978 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1979 vsz, vsz, cpu_reg_sp(s, a->rn));
1980 }
1981 return true;
1982}
1983
3a7be554 1984static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
1985{
1986 if ((a->imm & 0x1f) == 0) {
1987 return false;
1988 }
1989 if (sve_access_check(s)) {
1990 unsigned vsz = vec_full_reg_size(s);
1991 unsigned dofs = vec_full_reg_offset(s, a->rd);
1992 unsigned esz, index;
1993
1994 esz = ctz32(a->imm);
1995 index = a->imm >> (esz + 1);
1996
1997 if ((index << esz) < vsz) {
1998 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1999 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2000 } else {
7e17d50e
RH
2001 /*
2002 * While dup_mem handles 128-bit elements, dup_imm does not.
2003 * Thankfully element size doesn't matter for splatting zero.
2004 */
2005 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2006 }
2007 }
2008 return true;
2009}
2010
2011static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2012{
2013 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2014 static gen_insr * const fns[4] = {
2015 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2016 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2017 };
2018 unsigned vsz = vec_full_reg_size(s);
2019 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2020 TCGv_ptr t_zd = tcg_temp_new_ptr();
2021 TCGv_ptr t_zn = tcg_temp_new_ptr();
2022
2023 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2024 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2025
2026 fns[a->esz](t_zd, t_zn, val, desc);
2027
2028 tcg_temp_free_ptr(t_zd);
2029 tcg_temp_free_ptr(t_zn);
2030 tcg_temp_free_i32(desc);
2031}
2032
3a7be554 2033static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2034{
2035 if (sve_access_check(s)) {
2036 TCGv_i64 t = tcg_temp_new_i64();
2037 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2038 do_insr_i64(s, a, t);
2039 tcg_temp_free_i64(t);
2040 }
2041 return true;
2042}
2043
3a7be554 2044static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2045{
2046 if (sve_access_check(s)) {
2047 do_insr_i64(s, a, cpu_reg(s, a->rm));
2048 }
2049 return true;
2050}
2051
3a7be554 2052static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
30562ab7
RH
2053{
2054 static gen_helper_gvec_2 * const fns[4] = {
2055 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2056 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2057 };
2058
2059 if (sve_access_check(s)) {
40e32e5a 2060 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
30562ab7
RH
2061 }
2062 return true;
2063}
2064
3a7be554 2065static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2066{
2067 static gen_helper_gvec_3 * const fns[4] = {
2068 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2069 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2070 };
2071
2072 if (sve_access_check(s)) {
e645d1a1 2073 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
30562ab7
RH
2074 }
2075 return true;
2076}
2077
3a7be554 2078static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2079{
2080 static gen_helper_gvec_2 * const fns[4][2] = {
2081 { NULL, NULL },
2082 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2083 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2084 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2085 };
2086
2087 if (a->esz == 0) {
2088 return false;
2089 }
2090 if (sve_access_check(s)) {
2091 unsigned vsz = vec_full_reg_size(s);
2092 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2093 vec_full_reg_offset(s, a->rn)
2094 + (a->h ? vsz / 2 : 0),
2095 vsz, vsz, 0, fns[a->esz][a->u]);
2096 }
2097 return true;
2098}
2099
d731d8cb
RH
2100/*
2101 *** SVE Permute - Predicates Group
2102 */
2103
2104static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2105 gen_helper_gvec_3 *fn)
2106{
2107 if (!sve_access_check(s)) {
2108 return true;
2109 }
2110
2111 unsigned vsz = pred_full_reg_size(s);
2112
2113 /* Predicate sizes may be smaller and cannot use simd_desc.
2114 We cannot round up, as we do elsewhere, because we need
2115 the exact size for ZIP2 and REV. We retain the style for
2116 the other helpers for consistency. */
2117 TCGv_ptr t_d = tcg_temp_new_ptr();
2118 TCGv_ptr t_n = tcg_temp_new_ptr();
2119 TCGv_ptr t_m = tcg_temp_new_ptr();
2120 TCGv_i32 t_desc;
2121 int desc;
2122
2123 desc = vsz - 2;
2124 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2125 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2126
2127 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2128 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2129 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2130 t_desc = tcg_const_i32(desc);
2131
2132 fn(t_d, t_n, t_m, t_desc);
2133
2134 tcg_temp_free_ptr(t_d);
2135 tcg_temp_free_ptr(t_n);
2136 tcg_temp_free_ptr(t_m);
2137 tcg_temp_free_i32(t_desc);
2138 return true;
2139}
2140
2141static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2142 gen_helper_gvec_2 *fn)
2143{
2144 if (!sve_access_check(s)) {
2145 return true;
2146 }
2147
2148 unsigned vsz = pred_full_reg_size(s);
2149 TCGv_ptr t_d = tcg_temp_new_ptr();
2150 TCGv_ptr t_n = tcg_temp_new_ptr();
2151 TCGv_i32 t_desc;
2152 int desc;
2153
2154 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2155 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2156
2157 /* Predicate sizes may be smaller and cannot use simd_desc.
2158 We cannot round up, as we do elsewhere, because we need
2159 the exact size for ZIP2 and REV. We retain the style for
2160 the other helpers for consistency. */
2161
2162 desc = vsz - 2;
2163 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2164 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2165 t_desc = tcg_const_i32(desc);
2166
2167 fn(t_d, t_n, t_desc);
2168
2169 tcg_temp_free_i32(t_desc);
2170 tcg_temp_free_ptr(t_d);
2171 tcg_temp_free_ptr(t_n);
2172 return true;
2173}
2174
3a7be554 2175static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2176{
2177 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2178}
2179
3a7be554 2180static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2181{
2182 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2183}
2184
3a7be554 2185static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2186{
2187 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2188}
2189
3a7be554 2190static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2191{
2192 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2193}
2194
3a7be554 2195static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2196{
2197 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2198}
2199
3a7be554 2200static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2201{
2202 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2203}
2204
3a7be554 2205static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2206{
2207 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2208}
2209
3a7be554 2210static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2211{
2212 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2213}
2214
3a7be554 2215static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2216{
2217 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2218}
2219
234b48e9
RH
2220/*
2221 *** SVE Permute - Interleaving Group
2222 */
2223
2224static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2225{
2226 static gen_helper_gvec_3 * const fns[4] = {
2227 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2228 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2229 };
2230
2231 if (sve_access_check(s)) {
2232 unsigned vsz = vec_full_reg_size(s);
2233 unsigned high_ofs = high ? vsz / 2 : 0;
2234 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2235 vec_full_reg_offset(s, a->rn) + high_ofs,
2236 vec_full_reg_offset(s, a->rm) + high_ofs,
2237 vsz, vsz, 0, fns[a->esz]);
2238 }
2239 return true;
2240}
2241
2242static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2243 gen_helper_gvec_3 *fn)
2244{
2245 if (sve_access_check(s)) {
e645d1a1 2246 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
234b48e9
RH
2247 }
2248 return true;
2249}
2250
3a7be554 2251static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2252{
2253 return do_zip(s, a, false);
2254}
2255
3a7be554 2256static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2257{
2258 return do_zip(s, a, true);
2259}
2260
2261static gen_helper_gvec_3 * const uzp_fns[4] = {
2262 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2263 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2264};
2265
3a7be554 2266static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2267{
2268 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2269}
2270
3a7be554 2271static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2272{
2273 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2274}
2275
2276static gen_helper_gvec_3 * const trn_fns[4] = {
2277 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2278 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2279};
2280
3a7be554 2281static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2282{
2283 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2284}
2285
3a7be554 2286static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2287{
2288 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2289}
2290
3ca879ae
RH
2291/*
2292 *** SVE Permute Vector - Predicated Group
2293 */
2294
3a7be554 2295static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2296{
2297 static gen_helper_gvec_3 * const fns[4] = {
2298 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2299 };
2300 return do_zpz_ool(s, a, fns[a->esz]);
2301}
2302
ef23cb72
RH
2303/* Call the helper that computes the ARM LastActiveElement pseudocode
2304 * function, scaled by the element size. This includes the not found
2305 * indication; e.g. not found for esz=3 is -8.
2306 */
2307static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2308{
2309 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2310 * round up, as we do elsewhere, because we need the exact size.
2311 */
2312 TCGv_ptr t_p = tcg_temp_new_ptr();
2313 TCGv_i32 t_desc;
2314 unsigned vsz = pred_full_reg_size(s);
2315 unsigned desc;
2316
2317 desc = vsz - 2;
2318 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2319
2320 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2321 t_desc = tcg_const_i32(desc);
2322
2323 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2324
2325 tcg_temp_free_i32(t_desc);
2326 tcg_temp_free_ptr(t_p);
2327}
2328
2329/* Increment LAST to the offset of the next element in the vector,
2330 * wrapping around to 0.
2331 */
2332static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2333{
2334 unsigned vsz = vec_full_reg_size(s);
2335
2336 tcg_gen_addi_i32(last, last, 1 << esz);
2337 if (is_power_of_2(vsz)) {
2338 tcg_gen_andi_i32(last, last, vsz - 1);
2339 } else {
2340 TCGv_i32 max = tcg_const_i32(vsz);
2341 TCGv_i32 zero = tcg_const_i32(0);
2342 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2343 tcg_temp_free_i32(max);
2344 tcg_temp_free_i32(zero);
2345 }
2346}
2347
2348/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2349static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2350{
2351 unsigned vsz = vec_full_reg_size(s);
2352
2353 if (is_power_of_2(vsz)) {
2354 tcg_gen_andi_i32(last, last, vsz - 1);
2355 } else {
2356 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2357 TCGv_i32 zero = tcg_const_i32(0);
2358 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2359 tcg_temp_free_i32(max);
2360 tcg_temp_free_i32(zero);
2361 }
2362}
2363
2364/* Load an unsigned element of ESZ from BASE+OFS. */
2365static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2366{
2367 TCGv_i64 r = tcg_temp_new_i64();
2368
2369 switch (esz) {
2370 case 0:
2371 tcg_gen_ld8u_i64(r, base, ofs);
2372 break;
2373 case 1:
2374 tcg_gen_ld16u_i64(r, base, ofs);
2375 break;
2376 case 2:
2377 tcg_gen_ld32u_i64(r, base, ofs);
2378 break;
2379 case 3:
2380 tcg_gen_ld_i64(r, base, ofs);
2381 break;
2382 default:
2383 g_assert_not_reached();
2384 }
2385 return r;
2386}
2387
2388/* Load an unsigned element of ESZ from RM[LAST]. */
2389static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2390 int rm, int esz)
2391{
2392 TCGv_ptr p = tcg_temp_new_ptr();
2393 TCGv_i64 r;
2394
2395 /* Convert offset into vector into offset into ENV.
2396 * The final adjustment for the vector register base
2397 * is added via constant offset to the load.
2398 */
2399#ifdef HOST_WORDS_BIGENDIAN
2400 /* Adjust for element ordering. See vec_reg_offset. */
2401 if (esz < 3) {
2402 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2403 }
2404#endif
2405 tcg_gen_ext_i32_ptr(p, last);
2406 tcg_gen_add_ptr(p, p, cpu_env);
2407
2408 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2409 tcg_temp_free_ptr(p);
2410
2411 return r;
2412}
2413
2414/* Compute CLAST for a Zreg. */
2415static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2416{
2417 TCGv_i32 last;
2418 TCGLabel *over;
2419 TCGv_i64 ele;
2420 unsigned vsz, esz = a->esz;
2421
2422 if (!sve_access_check(s)) {
2423 return true;
2424 }
2425
2426 last = tcg_temp_local_new_i32();
2427 over = gen_new_label();
2428
2429 find_last_active(s, last, esz, a->pg);
2430
2431 /* There is of course no movcond for a 2048-bit vector,
2432 * so we must branch over the actual store.
2433 */
2434 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2435
2436 if (!before) {
2437 incr_last_active(s, last, esz);
2438 }
2439
2440 ele = load_last_active(s, last, a->rm, esz);
2441 tcg_temp_free_i32(last);
2442
2443 vsz = vec_full_reg_size(s);
2444 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2445 tcg_temp_free_i64(ele);
2446
2447 /* If this insn used MOVPRFX, we may need a second move. */
2448 if (a->rd != a->rn) {
2449 TCGLabel *done = gen_new_label();
2450 tcg_gen_br(done);
2451
2452 gen_set_label(over);
2453 do_mov_z(s, a->rd, a->rn);
2454
2455 gen_set_label(done);
2456 } else {
2457 gen_set_label(over);
2458 }
2459 return true;
2460}
2461
3a7be554 2462static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2463{
2464 return do_clast_vector(s, a, false);
2465}
2466
3a7be554 2467static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2468{
2469 return do_clast_vector(s, a, true);
2470}
2471
2472/* Compute CLAST for a scalar. */
2473static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2474 bool before, TCGv_i64 reg_val)
2475{
2476 TCGv_i32 last = tcg_temp_new_i32();
2477 TCGv_i64 ele, cmp, zero;
2478
2479 find_last_active(s, last, esz, pg);
2480
2481 /* Extend the original value of last prior to incrementing. */
2482 cmp = tcg_temp_new_i64();
2483 tcg_gen_ext_i32_i64(cmp, last);
2484
2485 if (!before) {
2486 incr_last_active(s, last, esz);
2487 }
2488
2489 /* The conceit here is that while last < 0 indicates not found, after
2490 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2491 * from which we can load garbage. We then discard the garbage with
2492 * a conditional move.
2493 */
2494 ele = load_last_active(s, last, rm, esz);
2495 tcg_temp_free_i32(last);
2496
2497 zero = tcg_const_i64(0);
2498 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2499
2500 tcg_temp_free_i64(zero);
2501 tcg_temp_free_i64(cmp);
2502 tcg_temp_free_i64(ele);
2503}
2504
2505/* Compute CLAST for a Vreg. */
2506static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2507{
2508 if (sve_access_check(s)) {
2509 int esz = a->esz;
2510 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2511 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2512
2513 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2514 write_fp_dreg(s, a->rd, reg);
2515 tcg_temp_free_i64(reg);
2516 }
2517 return true;
2518}
2519
3a7be554 2520static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2521{
2522 return do_clast_fp(s, a, false);
2523}
2524
3a7be554 2525static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2526{
2527 return do_clast_fp(s, a, true);
2528}
2529
2530/* Compute CLAST for a Xreg. */
2531static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2532{
2533 TCGv_i64 reg;
2534
2535 if (!sve_access_check(s)) {
2536 return true;
2537 }
2538
2539 reg = cpu_reg(s, a->rd);
2540 switch (a->esz) {
2541 case 0:
2542 tcg_gen_ext8u_i64(reg, reg);
2543 break;
2544 case 1:
2545 tcg_gen_ext16u_i64(reg, reg);
2546 break;
2547 case 2:
2548 tcg_gen_ext32u_i64(reg, reg);
2549 break;
2550 case 3:
2551 break;
2552 default:
2553 g_assert_not_reached();
2554 }
2555
2556 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2557 return true;
2558}
2559
3a7be554 2560static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2561{
2562 return do_clast_general(s, a, false);
2563}
2564
3a7be554 2565static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2566{
2567 return do_clast_general(s, a, true);
2568}
2569
2570/* Compute LAST for a scalar. */
2571static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2572 int pg, int rm, bool before)
2573{
2574 TCGv_i32 last = tcg_temp_new_i32();
2575 TCGv_i64 ret;
2576
2577 find_last_active(s, last, esz, pg);
2578 if (before) {
2579 wrap_last_active(s, last, esz);
2580 } else {
2581 incr_last_active(s, last, esz);
2582 }
2583
2584 ret = load_last_active(s, last, rm, esz);
2585 tcg_temp_free_i32(last);
2586 return ret;
2587}
2588
2589/* Compute LAST for a Vreg. */
2590static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2591{
2592 if (sve_access_check(s)) {
2593 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2594 write_fp_dreg(s, a->rd, val);
2595 tcg_temp_free_i64(val);
2596 }
2597 return true;
2598}
2599
3a7be554 2600static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2601{
2602 return do_last_fp(s, a, false);
2603}
2604
3a7be554 2605static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2606{
2607 return do_last_fp(s, a, true);
2608}
2609
2610/* Compute LAST for a Xreg. */
2611static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2612{
2613 if (sve_access_check(s)) {
2614 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2615 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2616 tcg_temp_free_i64(val);
2617 }
2618 return true;
2619}
2620
3a7be554 2621static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2622{
2623 return do_last_general(s, a, false);
2624}
2625
3a7be554 2626static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2627{
2628 return do_last_general(s, a, true);
2629}
2630
3a7be554 2631static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2632{
2633 if (sve_access_check(s)) {
2634 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2635 }
2636 return true;
2637}
2638
3a7be554 2639static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2640{
2641 if (sve_access_check(s)) {
2642 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2643 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2644 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2645 tcg_temp_free_i64(t);
2646 }
2647 return true;
2648}
2649
3a7be554 2650static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2651{
2652 static gen_helper_gvec_3 * const fns[4] = {
2653 NULL,
2654 gen_helper_sve_revb_h,
2655 gen_helper_sve_revb_s,
2656 gen_helper_sve_revb_d,
2657 };
2658 return do_zpz_ool(s, a, fns[a->esz]);
2659}
2660
3a7be554 2661static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2662{
2663 static gen_helper_gvec_3 * const fns[4] = {
2664 NULL,
2665 NULL,
2666 gen_helper_sve_revh_s,
2667 gen_helper_sve_revh_d,
2668 };
2669 return do_zpz_ool(s, a, fns[a->esz]);
2670}
2671
3a7be554 2672static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2673{
2674 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2675}
2676
3a7be554 2677static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2678{
2679 static gen_helper_gvec_3 * const fns[4] = {
2680 gen_helper_sve_rbit_b,
2681 gen_helper_sve_rbit_h,
2682 gen_helper_sve_rbit_s,
2683 gen_helper_sve_rbit_d,
2684 };
2685 return do_zpz_ool(s, a, fns[a->esz]);
2686}
2687
3a7be554 2688static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
2689{
2690 if (sve_access_check(s)) {
36cbb7a8 2691 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
dd701faf 2692 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
2693 }
2694 return true;
2695}
2696
757f9cff
RH
2697/*
2698 *** SVE Integer Compare - Vectors Group
2699 */
2700
2701static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2702 gen_helper_gvec_flags_4 *gen_fn)
2703{
2704 TCGv_ptr pd, zn, zm, pg;
2705 unsigned vsz;
2706 TCGv_i32 t;
2707
2708 if (gen_fn == NULL) {
2709 return false;
2710 }
2711 if (!sve_access_check(s)) {
2712 return true;
2713 }
2714
2715 vsz = vec_full_reg_size(s);
2716 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2717 pd = tcg_temp_new_ptr();
2718 zn = tcg_temp_new_ptr();
2719 zm = tcg_temp_new_ptr();
2720 pg = tcg_temp_new_ptr();
2721
2722 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2723 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2724 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2725 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2726
2727 gen_fn(t, pd, zn, zm, pg, t);
2728
2729 tcg_temp_free_ptr(pd);
2730 tcg_temp_free_ptr(zn);
2731 tcg_temp_free_ptr(zm);
2732 tcg_temp_free_ptr(pg);
2733
2734 do_pred_flags(t);
2735
2736 tcg_temp_free_i32(t);
2737 return true;
2738}
2739
2740#define DO_PPZZ(NAME, name) \
3a7be554 2741static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2742{ \
2743 static gen_helper_gvec_flags_4 * const fns[4] = { \
2744 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2745 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2746 }; \
2747 return do_ppzz_flags(s, a, fns[a->esz]); \
2748}
2749
2750DO_PPZZ(CMPEQ, cmpeq)
2751DO_PPZZ(CMPNE, cmpne)
2752DO_PPZZ(CMPGT, cmpgt)
2753DO_PPZZ(CMPGE, cmpge)
2754DO_PPZZ(CMPHI, cmphi)
2755DO_PPZZ(CMPHS, cmphs)
2756
2757#undef DO_PPZZ
2758
2759#define DO_PPZW(NAME, name) \
3a7be554 2760static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2761{ \
2762 static gen_helper_gvec_flags_4 * const fns[4] = { \
2763 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2764 gen_helper_sve_##name##_ppzw_s, NULL \
2765 }; \
2766 return do_ppzz_flags(s, a, fns[a->esz]); \
2767}
2768
2769DO_PPZW(CMPEQ, cmpeq)
2770DO_PPZW(CMPNE, cmpne)
2771DO_PPZW(CMPGT, cmpgt)
2772DO_PPZW(CMPGE, cmpge)
2773DO_PPZW(CMPHI, cmphi)
2774DO_PPZW(CMPHS, cmphs)
2775DO_PPZW(CMPLT, cmplt)
2776DO_PPZW(CMPLE, cmple)
2777DO_PPZW(CMPLO, cmplo)
2778DO_PPZW(CMPLS, cmpls)
2779
2780#undef DO_PPZW
2781
38cadeba
RH
2782/*
2783 *** SVE Integer Compare - Immediate Groups
2784 */
2785
2786static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2787 gen_helper_gvec_flags_3 *gen_fn)
2788{
2789 TCGv_ptr pd, zn, pg;
2790 unsigned vsz;
2791 TCGv_i32 t;
2792
2793 if (gen_fn == NULL) {
2794 return false;
2795 }
2796 if (!sve_access_check(s)) {
2797 return true;
2798 }
2799
2800 vsz = vec_full_reg_size(s);
2801 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2802 pd = tcg_temp_new_ptr();
2803 zn = tcg_temp_new_ptr();
2804 pg = tcg_temp_new_ptr();
2805
2806 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2807 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2808 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2809
2810 gen_fn(t, pd, zn, pg, t);
2811
2812 tcg_temp_free_ptr(pd);
2813 tcg_temp_free_ptr(zn);
2814 tcg_temp_free_ptr(pg);
2815
2816 do_pred_flags(t);
2817
2818 tcg_temp_free_i32(t);
2819 return true;
2820}
2821
2822#define DO_PPZI(NAME, name) \
3a7be554 2823static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
2824{ \
2825 static gen_helper_gvec_flags_3 * const fns[4] = { \
2826 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2827 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2828 }; \
2829 return do_ppzi_flags(s, a, fns[a->esz]); \
2830}
2831
2832DO_PPZI(CMPEQ, cmpeq)
2833DO_PPZI(CMPNE, cmpne)
2834DO_PPZI(CMPGT, cmpgt)
2835DO_PPZI(CMPGE, cmpge)
2836DO_PPZI(CMPHI, cmphi)
2837DO_PPZI(CMPHS, cmphs)
2838DO_PPZI(CMPLT, cmplt)
2839DO_PPZI(CMPLE, cmple)
2840DO_PPZI(CMPLO, cmplo)
2841DO_PPZI(CMPLS, cmpls)
2842
2843#undef DO_PPZI
2844
35da316f
RH
2845/*
2846 *** SVE Partition Break Group
2847 */
2848
2849static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2850 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2851{
2852 if (!sve_access_check(s)) {
2853 return true;
2854 }
2855
2856 unsigned vsz = pred_full_reg_size(s);
2857
2858 /* Predicate sizes may be smaller and cannot use simd_desc. */
2859 TCGv_ptr d = tcg_temp_new_ptr();
2860 TCGv_ptr n = tcg_temp_new_ptr();
2861 TCGv_ptr m = tcg_temp_new_ptr();
2862 TCGv_ptr g = tcg_temp_new_ptr();
2863 TCGv_i32 t = tcg_const_i32(vsz - 2);
2864
2865 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2866 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2867 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2868 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2869
2870 if (a->s) {
2871 fn_s(t, d, n, m, g, t);
2872 do_pred_flags(t);
2873 } else {
2874 fn(d, n, m, g, t);
2875 }
2876 tcg_temp_free_ptr(d);
2877 tcg_temp_free_ptr(n);
2878 tcg_temp_free_ptr(m);
2879 tcg_temp_free_ptr(g);
2880 tcg_temp_free_i32(t);
2881 return true;
2882}
2883
2884static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2885 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2886{
2887 if (!sve_access_check(s)) {
2888 return true;
2889 }
2890
2891 unsigned vsz = pred_full_reg_size(s);
2892
2893 /* Predicate sizes may be smaller and cannot use simd_desc. */
2894 TCGv_ptr d = tcg_temp_new_ptr();
2895 TCGv_ptr n = tcg_temp_new_ptr();
2896 TCGv_ptr g = tcg_temp_new_ptr();
2897 TCGv_i32 t = tcg_const_i32(vsz - 2);
2898
2899 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2900 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2901 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2902
2903 if (a->s) {
2904 fn_s(t, d, n, g, t);
2905 do_pred_flags(t);
2906 } else {
2907 fn(d, n, g, t);
2908 }
2909 tcg_temp_free_ptr(d);
2910 tcg_temp_free_ptr(n);
2911 tcg_temp_free_ptr(g);
2912 tcg_temp_free_i32(t);
2913 return true;
2914}
2915
3a7be554 2916static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2917{
2918 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2919}
2920
3a7be554 2921static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2922{
2923 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2924}
2925
3a7be554 2926static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2927{
2928 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2929}
2930
3a7be554 2931static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2932{
2933 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2934}
2935
3a7be554 2936static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2937{
2938 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2939}
2940
3a7be554 2941static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2942{
2943 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2944}
2945
3a7be554 2946static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2947{
2948 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2949}
2950
9ee3a611
RH
2951/*
2952 *** SVE Predicate Count Group
2953 */
2954
2955static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2956{
2957 unsigned psz = pred_full_reg_size(s);
2958
2959 if (psz <= 8) {
2960 uint64_t psz_mask;
2961
2962 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2963 if (pn != pg) {
2964 TCGv_i64 g = tcg_temp_new_i64();
2965 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2966 tcg_gen_and_i64(val, val, g);
2967 tcg_temp_free_i64(g);
2968 }
2969
2970 /* Reduce the pred_esz_masks value simply to reduce the
2971 * size of the code generated here.
2972 */
2973 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2974 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2975
2976 tcg_gen_ctpop_i64(val, val);
2977 } else {
2978 TCGv_ptr t_pn = tcg_temp_new_ptr();
2979 TCGv_ptr t_pg = tcg_temp_new_ptr();
2980 unsigned desc;
2981 TCGv_i32 t_desc;
2982
2983 desc = psz - 2;
2984 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2985
2986 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
2987 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2988 t_desc = tcg_const_i32(desc);
2989
2990 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
2991 tcg_temp_free_ptr(t_pn);
2992 tcg_temp_free_ptr(t_pg);
2993 tcg_temp_free_i32(t_desc);
2994 }
2995}
2996
3a7be554 2997static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
2998{
2999 if (sve_access_check(s)) {
3000 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3001 }
3002 return true;
3003}
3004
3a7be554 3005static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3006{
3007 if (sve_access_check(s)) {
3008 TCGv_i64 reg = cpu_reg(s, a->rd);
3009 TCGv_i64 val = tcg_temp_new_i64();
3010
3011 do_cntp(s, val, a->esz, a->pg, a->pg);
3012 if (a->d) {
3013 tcg_gen_sub_i64(reg, reg, val);
3014 } else {
3015 tcg_gen_add_i64(reg, reg, val);
3016 }
3017 tcg_temp_free_i64(val);
3018 }
3019 return true;
3020}
3021
3a7be554 3022static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3023{
3024 if (a->esz == 0) {
3025 return false;
3026 }
3027 if (sve_access_check(s)) {
3028 unsigned vsz = vec_full_reg_size(s);
3029 TCGv_i64 val = tcg_temp_new_i64();
3030 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3031
3032 do_cntp(s, val, a->esz, a->pg, a->pg);
3033 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3034 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3035 }
3036 return true;
3037}
3038
3a7be554 3039static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3040{
3041 if (sve_access_check(s)) {
3042 TCGv_i64 reg = cpu_reg(s, a->rd);
3043 TCGv_i64 val = tcg_temp_new_i64();
3044
3045 do_cntp(s, val, a->esz, a->pg, a->pg);
3046 do_sat_addsub_32(reg, val, a->u, a->d);
3047 }
3048 return true;
3049}
3050
3a7be554 3051static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3052{
3053 if (sve_access_check(s)) {
3054 TCGv_i64 reg = cpu_reg(s, a->rd);
3055 TCGv_i64 val = tcg_temp_new_i64();
3056
3057 do_cntp(s, val, a->esz, a->pg, a->pg);
3058 do_sat_addsub_64(reg, val, a->u, a->d);
3059 }
3060 return true;
3061}
3062
3a7be554 3063static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3064{
3065 if (a->esz == 0) {
3066 return false;
3067 }
3068 if (sve_access_check(s)) {
3069 TCGv_i64 val = tcg_temp_new_i64();
3070 do_cntp(s, val, a->esz, a->pg, a->pg);
3071 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3072 }
3073 return true;
3074}
3075
caf1cefc
RH
3076/*
3077 *** SVE Integer Compare Scalars Group
3078 */
3079
3a7be554 3080static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3081{
3082 if (!sve_access_check(s)) {
3083 return true;
3084 }
3085
3086 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3087 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3088 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3089 TCGv_i64 cmp = tcg_temp_new_i64();
3090
3091 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3092 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3093 tcg_temp_free_i64(cmp);
3094
3095 /* VF = !NF & !CF. */
3096 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3097 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3098
3099 /* Both NF and VF actually look at bit 31. */
3100 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3101 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3102 return true;
3103}
3104
3a7be554 3105static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3106{
bbd0968c 3107 TCGv_i64 op0, op1, t0, t1, tmax;
caf1cefc
RH
3108 TCGv_i32 t2, t3;
3109 TCGv_ptr ptr;
3110 unsigned desc, vsz = vec_full_reg_size(s);
3111 TCGCond cond;
3112
bbd0968c
RH
3113 if (!sve_access_check(s)) {
3114 return true;
3115 }
3116
3117 op0 = read_cpu_reg(s, a->rn, 1);
3118 op1 = read_cpu_reg(s, a->rm, 1);
3119
caf1cefc
RH
3120 if (!a->sf) {
3121 if (a->u) {
3122 tcg_gen_ext32u_i64(op0, op0);
3123 tcg_gen_ext32u_i64(op1, op1);
3124 } else {
3125 tcg_gen_ext32s_i64(op0, op0);
3126 tcg_gen_ext32s_i64(op1, op1);
3127 }
3128 }
3129
3130 /* For the helper, compress the different conditions into a computation
3131 * of how many iterations for which the condition is true.
caf1cefc 3132 */
bbd0968c
RH
3133 t0 = tcg_temp_new_i64();
3134 t1 = tcg_temp_new_i64();
caf1cefc
RH
3135 tcg_gen_sub_i64(t0, op1, op0);
3136
bbd0968c 3137 tmax = tcg_const_i64(vsz >> a->esz);
caf1cefc
RH
3138 if (a->eq) {
3139 /* Equality means one more iteration. */
3140 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c
RH
3141
3142 /* If op1 is max (un)signed integer (and the only time the addition
3143 * above could overflow), then we produce an all-true predicate by
3144 * setting the count to the vector length. This is because the
3145 * pseudocode is described as an increment + compare loop, and the
3146 * max integer would always compare true.
3147 */
3148 tcg_gen_movi_i64(t1, (a->sf
3149 ? (a->u ? UINT64_MAX : INT64_MAX)
3150 : (a->u ? UINT32_MAX : INT32_MAX)));
3151 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3152 }
3153
bbd0968c
RH
3154 /* Bound to the maximum. */
3155 tcg_gen_umin_i64(t0, t0, tmax);
3156 tcg_temp_free_i64(tmax);
3157
3158 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3159 cond = (a->u
3160 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3161 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3162 tcg_gen_movi_i64(t1, 0);
3163 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3164 tcg_temp_free_i64(t1);
caf1cefc 3165
bbd0968c 3166 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3167 t2 = tcg_temp_new_i32();
3168 tcg_gen_extrl_i64_i32(t2, t0);
3169 tcg_temp_free_i64(t0);
bbd0968c
RH
3170
3171 /* Scale elements to bits. */
3172 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc
RH
3173
3174 desc = (vsz / 8) - 2;
3175 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3176 t3 = tcg_const_i32(desc);
3177
3178 ptr = tcg_temp_new_ptr();
3179 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3180
3181 gen_helper_sve_while(t2, ptr, t2, t3);
3182 do_pred_flags(t2);
3183
3184 tcg_temp_free_ptr(ptr);
3185 tcg_temp_free_i32(t2);
3186 tcg_temp_free_i32(t3);
3187 return true;
3188}
3189
ed491961
RH
3190/*
3191 *** SVE Integer Wide Immediate - Unpredicated Group
3192 */
3193
3a7be554 3194static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3195{
3196 if (a->esz == 0) {
3197 return false;
3198 }
3199 if (sve_access_check(s)) {
3200 unsigned vsz = vec_full_reg_size(s);
3201 int dofs = vec_full_reg_offset(s, a->rd);
3202 uint64_t imm;
3203
3204 /* Decode the VFP immediate. */
3205 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3206 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3207 }
3208 return true;
3209}
3210
3a7be554 3211static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3212{
3a7be554 3213 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3214 return false;
3215 }
3216 if (sve_access_check(s)) {
3217 unsigned vsz = vec_full_reg_size(s);
3218 int dofs = vec_full_reg_offset(s, a->rd);
3219
8711e71f 3220 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3221 }
3222 return true;
3223}
3224
3a7be554 3225static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3226{
3a7be554 3227 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3228 return false;
3229 }
3230 if (sve_access_check(s)) {
3231 unsigned vsz = vec_full_reg_size(s);
3232 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3233 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3234 }
3235 return true;
3236}
3237
3a7be554 3238static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3239{
3240 a->imm = -a->imm;
3a7be554 3241 return trans_ADD_zzi(s, a);
6e6a157d
RH
3242}
3243
3a7be554 3244static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3245{
53229a77 3246 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3247 static const GVecGen2s op[4] = {
3248 { .fni8 = tcg_gen_vec_sub8_i64,
3249 .fniv = tcg_gen_sub_vec,
3250 .fno = gen_helper_sve_subri_b,
53229a77 3251 .opt_opc = vecop_list,
6e6a157d
RH
3252 .vece = MO_8,
3253 .scalar_first = true },
3254 { .fni8 = tcg_gen_vec_sub16_i64,
3255 .fniv = tcg_gen_sub_vec,
3256 .fno = gen_helper_sve_subri_h,
53229a77 3257 .opt_opc = vecop_list,
6e6a157d
RH
3258 .vece = MO_16,
3259 .scalar_first = true },
3260 { .fni4 = tcg_gen_sub_i32,
3261 .fniv = tcg_gen_sub_vec,
3262 .fno = gen_helper_sve_subri_s,
53229a77 3263 .opt_opc = vecop_list,
6e6a157d
RH
3264 .vece = MO_32,
3265 .scalar_first = true },
3266 { .fni8 = tcg_gen_sub_i64,
3267 .fniv = tcg_gen_sub_vec,
3268 .fno = gen_helper_sve_subri_d,
53229a77 3269 .opt_opc = vecop_list,
6e6a157d
RH
3270 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3271 .vece = MO_64,
3272 .scalar_first = true }
3273 };
3274
3a7be554 3275 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3276 return false;
3277 }
3278 if (sve_access_check(s)) {
3279 unsigned vsz = vec_full_reg_size(s);
3280 TCGv_i64 c = tcg_const_i64(a->imm);
3281 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3282 vec_full_reg_offset(s, a->rn),
3283 vsz, vsz, c, &op[a->esz]);
3284 tcg_temp_free_i64(c);
3285 }
3286 return true;
3287}
3288
3a7be554 3289static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3290{
3291 if (sve_access_check(s)) {
3292 unsigned vsz = vec_full_reg_size(s);
3293 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3294 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3295 }
3296 return true;
3297}
3298
3a7be554 3299static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3300{
3a7be554 3301 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3302 return false;
3303 }
3304 if (sve_access_check(s)) {
3305 TCGv_i64 val = tcg_const_i64(a->imm);
3306 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3307 tcg_temp_free_i64(val);
3308 }
3309 return true;
3310}
3311
3a7be554 3312static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3313{
3a7be554 3314 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3315}
3316
3a7be554 3317static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3318{
3a7be554 3319 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3320}
3321
3a7be554 3322static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3323{
3a7be554 3324 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3325}
3326
3a7be554 3327static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3328{
3a7be554 3329 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3330}
3331
3332static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3333{
3334 if (sve_access_check(s)) {
3335 unsigned vsz = vec_full_reg_size(s);
3336 TCGv_i64 c = tcg_const_i64(a->imm);
3337
3338 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3339 vec_full_reg_offset(s, a->rn),
3340 c, vsz, vsz, 0, fn);
3341 tcg_temp_free_i64(c);
3342 }
3343 return true;
3344}
3345
3346#define DO_ZZI(NAME, name) \
3a7be554 3347static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3348{ \
3349 static gen_helper_gvec_2i * const fns[4] = { \
3350 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3351 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3352 }; \
3353 return do_zzi_ool(s, a, fns[a->esz]); \
3354}
3355
3356DO_ZZI(SMAX, smax)
3357DO_ZZI(UMAX, umax)
3358DO_ZZI(SMIN, smin)
3359DO_ZZI(UMIN, umin)
3360
3361#undef DO_ZZI
3362
3a7be554 3363static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
d730ecaa
RH
3364{
3365 static gen_helper_gvec_3 * const fns[2][2] = {
3366 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3367 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3368 };
3369
3370 if (sve_access_check(s)) {
e645d1a1 3371 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
d730ecaa
RH
3372 }
3373 return true;
3374}
3375
3a7be554 3376static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
16fcfdc7
RH
3377{
3378 static gen_helper_gvec_3 * const fns[2][2] = {
3379 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3380 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3381 };
3382
3383 if (sve_access_check(s)) {
e645d1a1 3384 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
16fcfdc7
RH
3385 }
3386 return true;
3387}
3388
3389
ca40a6e6
RH
3390/*
3391 *** SVE Floating Point Multiply-Add Indexed Group
3392 */
3393
3a7be554 3394static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
ca40a6e6
RH
3395{
3396 static gen_helper_gvec_4_ptr * const fns[3] = {
3397 gen_helper_gvec_fmla_idx_h,
3398 gen_helper_gvec_fmla_idx_s,
3399 gen_helper_gvec_fmla_idx_d,
3400 };
3401
3402 if (sve_access_check(s)) {
3403 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3404 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3405 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3406 vec_full_reg_offset(s, a->rn),
3407 vec_full_reg_offset(s, a->rm),
3408 vec_full_reg_offset(s, a->ra),
3409 status, vsz, vsz, (a->index << 1) | a->sub,
3410 fns[a->esz - 1]);
3411 tcg_temp_free_ptr(status);
3412 }
3413 return true;
3414}
3415
3416/*
3417 *** SVE Floating Point Multiply Indexed Group
3418 */
3419
3a7be554 3420static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3421{
3422 static gen_helper_gvec_3_ptr * const fns[3] = {
3423 gen_helper_gvec_fmul_idx_h,
3424 gen_helper_gvec_fmul_idx_s,
3425 gen_helper_gvec_fmul_idx_d,
3426 };
3427
3428 if (sve_access_check(s)) {
3429 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3430 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3431 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3432 vec_full_reg_offset(s, a->rn),
3433 vec_full_reg_offset(s, a->rm),
3434 status, vsz, vsz, a->index, fns[a->esz - 1]);
3435 tcg_temp_free_ptr(status);
3436 }
3437 return true;
3438}
3439
23fbe79f
RH
3440/*
3441 *** SVE Floating Point Fast Reduction Group
3442 */
3443
3444typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3445 TCGv_ptr, TCGv_i32);
3446
3447static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3448 gen_helper_fp_reduce *fn)
3449{
3450 unsigned vsz = vec_full_reg_size(s);
3451 unsigned p2vsz = pow2ceil(vsz);
3452 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3453 TCGv_ptr t_zn, t_pg, status;
3454 TCGv_i64 temp;
3455
3456 temp = tcg_temp_new_i64();
3457 t_zn = tcg_temp_new_ptr();
3458 t_pg = tcg_temp_new_ptr();
3459
3460 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3461 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3462 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3463
3464 fn(temp, t_zn, t_pg, status, t_desc);
3465 tcg_temp_free_ptr(t_zn);
3466 tcg_temp_free_ptr(t_pg);
3467 tcg_temp_free_ptr(status);
3468 tcg_temp_free_i32(t_desc);
3469
3470 write_fp_dreg(s, a->rd, temp);
3471 tcg_temp_free_i64(temp);
3472}
3473
3474#define DO_VPZ(NAME, name) \
3a7be554 3475static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3476{ \
3477 static gen_helper_fp_reduce * const fns[3] = { \
3478 gen_helper_sve_##name##_h, \
3479 gen_helper_sve_##name##_s, \
3480 gen_helper_sve_##name##_d, \
3481 }; \
3482 if (a->esz == 0) { \
3483 return false; \
3484 } \
3485 if (sve_access_check(s)) { \
3486 do_reduce(s, a, fns[a->esz - 1]); \
3487 } \
3488 return true; \
3489}
3490
3491DO_VPZ(FADDV, faddv)
3492DO_VPZ(FMINNMV, fminnmv)
3493DO_VPZ(FMAXNMV, fmaxnmv)
3494DO_VPZ(FMINV, fminv)
3495DO_VPZ(FMAXV, fmaxv)
3496
3887c038
RH
3497/*
3498 *** SVE Floating Point Unary Operations - Unpredicated Group
3499 */
3500
3501static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3502{
3503 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3504 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
3505
3506 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3507 vec_full_reg_offset(s, a->rn),
3508 status, vsz, vsz, 0, fn);
3509 tcg_temp_free_ptr(status);
3510}
3511
3a7be554 3512static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3513{
3514 static gen_helper_gvec_2_ptr * const fns[3] = {
3515 gen_helper_gvec_frecpe_h,
3516 gen_helper_gvec_frecpe_s,
3517 gen_helper_gvec_frecpe_d,
3518 };
3519 if (a->esz == 0) {
3520 return false;
3521 }
3522 if (sve_access_check(s)) {
3523 do_zz_fp(s, a, fns[a->esz - 1]);
3524 }
3525 return true;
3526}
3527
3a7be554 3528static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3529{
3530 static gen_helper_gvec_2_ptr * const fns[3] = {
3531 gen_helper_gvec_frsqrte_h,
3532 gen_helper_gvec_frsqrte_s,
3533 gen_helper_gvec_frsqrte_d,
3534 };
3535 if (a->esz == 0) {
3536 return false;
3537 }
3538 if (sve_access_check(s)) {
3539 do_zz_fp(s, a, fns[a->esz - 1]);
3540 }
3541 return true;
3542}
3543
4d2e2a03
RH
3544/*
3545 *** SVE Floating Point Compare with Zero Group
3546 */
3547
3548static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3549 gen_helper_gvec_3_ptr *fn)
3550{
3551 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3552 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
3553
3554 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3555 vec_full_reg_offset(s, a->rn),
3556 pred_full_reg_offset(s, a->pg),
3557 status, vsz, vsz, 0, fn);
3558 tcg_temp_free_ptr(status);
3559}
3560
3561#define DO_PPZ(NAME, name) \
3a7be554 3562static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3563{ \
3564 static gen_helper_gvec_3_ptr * const fns[3] = { \
3565 gen_helper_sve_##name##_h, \
3566 gen_helper_sve_##name##_s, \
3567 gen_helper_sve_##name##_d, \
3568 }; \
3569 if (a->esz == 0) { \
3570 return false; \
3571 } \
3572 if (sve_access_check(s)) { \
3573 do_ppz_fp(s, a, fns[a->esz - 1]); \
3574 } \
3575 return true; \
3576}
3577
3578DO_PPZ(FCMGE_ppz0, fcmge0)
3579DO_PPZ(FCMGT_ppz0, fcmgt0)
3580DO_PPZ(FCMLE_ppz0, fcmle0)
3581DO_PPZ(FCMLT_ppz0, fcmlt0)
3582DO_PPZ(FCMEQ_ppz0, fcmeq0)
3583DO_PPZ(FCMNE_ppz0, fcmne0)
3584
3585#undef DO_PPZ
3586
67fcd9ad
RH
3587/*
3588 *** SVE floating-point trig multiply-add coefficient
3589 */
3590
3a7be554 3591static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
3592{
3593 static gen_helper_gvec_3_ptr * const fns[3] = {
3594 gen_helper_sve_ftmad_h,
3595 gen_helper_sve_ftmad_s,
3596 gen_helper_sve_ftmad_d,
3597 };
3598
3599 if (a->esz == 0) {
3600 return false;
3601 }
3602 if (sve_access_check(s)) {
3603 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3604 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
3605 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3606 vec_full_reg_offset(s, a->rn),
3607 vec_full_reg_offset(s, a->rm),
3608 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3609 tcg_temp_free_ptr(status);
3610 }
3611 return true;
3612}
3613
7f9ddf64
RH
3614/*
3615 *** SVE Floating Point Accumulating Reduction Group
3616 */
3617
3a7be554 3618static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3619{
3620 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3621 TCGv_ptr, TCGv_ptr, TCGv_i32);
3622 static fadda_fn * const fns[3] = {
3623 gen_helper_sve_fadda_h,
3624 gen_helper_sve_fadda_s,
3625 gen_helper_sve_fadda_d,
3626 };
3627 unsigned vsz = vec_full_reg_size(s);
3628 TCGv_ptr t_rm, t_pg, t_fpst;
3629 TCGv_i64 t_val;
3630 TCGv_i32 t_desc;
3631
3632 if (a->esz == 0) {
3633 return false;
3634 }
3635 if (!sve_access_check(s)) {
3636 return true;
3637 }
3638
3639 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3640 t_rm = tcg_temp_new_ptr();
3641 t_pg = tcg_temp_new_ptr();
3642 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3643 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3644 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7f9ddf64
RH
3645 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3646
3647 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3648
3649 tcg_temp_free_i32(t_desc);
3650 tcg_temp_free_ptr(t_fpst);
3651 tcg_temp_free_ptr(t_pg);
3652 tcg_temp_free_ptr(t_rm);
3653
3654 write_fp_dreg(s, a->rd, t_val);
3655 tcg_temp_free_i64(t_val);
3656 return true;
3657}
3658
29b80469
RH
3659/*
3660 *** SVE Floating Point Arithmetic - Unpredicated Group
3661 */
3662
3663static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3664 gen_helper_gvec_3_ptr *fn)
3665{
3666 if (fn == NULL) {
3667 return false;
3668 }
3669 if (sve_access_check(s)) {
3670 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3671 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
3672 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3673 vec_full_reg_offset(s, a->rn),
3674 vec_full_reg_offset(s, a->rm),
3675 status, vsz, vsz, 0, fn);
3676 tcg_temp_free_ptr(status);
3677 }
3678 return true;
3679}
3680
3681
3682#define DO_FP3(NAME, name) \
3a7be554 3683static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
3684{ \
3685 static gen_helper_gvec_3_ptr * const fns[4] = { \
3686 NULL, gen_helper_gvec_##name##_h, \
3687 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3688 }; \
3689 return do_zzz_fp(s, a, fns[a->esz]); \
3690}
3691
3692DO_FP3(FADD_zzz, fadd)
3693DO_FP3(FSUB_zzz, fsub)
3694DO_FP3(FMUL_zzz, fmul)
3695DO_FP3(FTSMUL, ftsmul)
3696DO_FP3(FRECPS, recps)
3697DO_FP3(FRSQRTS, rsqrts)
3698
3699#undef DO_FP3
3700
ec3b87c2
RH
3701/*
3702 *** SVE Floating Point Arithmetic - Predicated Group
3703 */
3704
3705static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3706 gen_helper_gvec_4_ptr *fn)
3707{
3708 if (fn == NULL) {
3709 return false;
3710 }
3711 if (sve_access_check(s)) {
3712 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3713 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
3714 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3715 vec_full_reg_offset(s, a->rn),
3716 vec_full_reg_offset(s, a->rm),
3717 pred_full_reg_offset(s, a->pg),
3718 status, vsz, vsz, 0, fn);
3719 tcg_temp_free_ptr(status);
3720 }
3721 return true;
3722}
3723
3724#define DO_FP3(NAME, name) \
3a7be554 3725static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
3726{ \
3727 static gen_helper_gvec_4_ptr * const fns[4] = { \
3728 NULL, gen_helper_sve_##name##_h, \
3729 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3730 }; \
3731 return do_zpzz_fp(s, a, fns[a->esz]); \
3732}
3733
3734DO_FP3(FADD_zpzz, fadd)
3735DO_FP3(FSUB_zpzz, fsub)
3736DO_FP3(FMUL_zpzz, fmul)
3737DO_FP3(FMIN_zpzz, fmin)
3738DO_FP3(FMAX_zpzz, fmax)
3739DO_FP3(FMINNM_zpzz, fminnum)
3740DO_FP3(FMAXNM_zpzz, fmaxnum)
3741DO_FP3(FABD, fabd)
3742DO_FP3(FSCALE, fscalbn)
3743DO_FP3(FDIV, fdiv)
3744DO_FP3(FMULX, fmulx)
3745
3746#undef DO_FP3
8092c6a3 3747
cc48affe
RH
3748typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3749 TCGv_i64, TCGv_ptr, TCGv_i32);
3750
3751static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3752 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3753{
3754 unsigned vsz = vec_full_reg_size(s);
3755 TCGv_ptr t_zd, t_zn, t_pg, status;
3756 TCGv_i32 desc;
3757
3758 t_zd = tcg_temp_new_ptr();
3759 t_zn = tcg_temp_new_ptr();
3760 t_pg = tcg_temp_new_ptr();
3761 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3762 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3763 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3764
cdfb22bb 3765 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
cc48affe
RH
3766 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3767 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3768
3769 tcg_temp_free_i32(desc);
3770 tcg_temp_free_ptr(status);
3771 tcg_temp_free_ptr(t_pg);
3772 tcg_temp_free_ptr(t_zn);
3773 tcg_temp_free_ptr(t_zd);
3774}
3775
3776static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3777 gen_helper_sve_fp2scalar *fn)
3778{
3779 TCGv_i64 temp = tcg_const_i64(imm);
3780 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3781 tcg_temp_free_i64(temp);
3782}
3783
3784#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 3785static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
3786{ \
3787 static gen_helper_sve_fp2scalar * const fns[3] = { \
3788 gen_helper_sve_##name##_h, \
3789 gen_helper_sve_##name##_s, \
3790 gen_helper_sve_##name##_d \
3791 }; \
3792 static uint64_t const val[3][2] = { \
3793 { float16_##const0, float16_##const1 }, \
3794 { float32_##const0, float32_##const1 }, \
3795 { float64_##const0, float64_##const1 }, \
3796 }; \
3797 if (a->esz == 0) { \
3798 return false; \
3799 } \
3800 if (sve_access_check(s)) { \
3801 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3802 } \
3803 return true; \
3804}
3805
cc48affe
RH
3806DO_FP_IMM(FADD, fadds, half, one)
3807DO_FP_IMM(FSUB, fsubs, half, one)
3808DO_FP_IMM(FMUL, fmuls, half, two)
3809DO_FP_IMM(FSUBR, fsubrs, half, one)
3810DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3811DO_FP_IMM(FMINNM, fminnms, zero, one)
3812DO_FP_IMM(FMAX, fmaxs, zero, one)
3813DO_FP_IMM(FMIN, fmins, zero, one)
3814
3815#undef DO_FP_IMM
3816
abfdefd5
RH
3817static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3818 gen_helper_gvec_4_ptr *fn)
3819{
3820 if (fn == NULL) {
3821 return false;
3822 }
3823 if (sve_access_check(s)) {
3824 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3825 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
3826 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3827 vec_full_reg_offset(s, a->rn),
3828 vec_full_reg_offset(s, a->rm),
3829 pred_full_reg_offset(s, a->pg),
3830 status, vsz, vsz, 0, fn);
3831 tcg_temp_free_ptr(status);
3832 }
3833 return true;
3834}
3835
3836#define DO_FPCMP(NAME, name) \
3a7be554 3837static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
3838{ \
3839 static gen_helper_gvec_4_ptr * const fns[4] = { \
3840 NULL, gen_helper_sve_##name##_h, \
3841 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3842 }; \
3843 return do_fp_cmp(s, a, fns[a->esz]); \
3844}
3845
3846DO_FPCMP(FCMGE, fcmge)
3847DO_FPCMP(FCMGT, fcmgt)
3848DO_FPCMP(FCMEQ, fcmeq)
3849DO_FPCMP(FCMNE, fcmne)
3850DO_FPCMP(FCMUO, fcmuo)
3851DO_FPCMP(FACGE, facge)
3852DO_FPCMP(FACGT, facgt)
3853
3854#undef DO_FPCMP
3855
3a7be554 3856static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
3857{
3858 static gen_helper_gvec_4_ptr * const fns[3] = {
3859 gen_helper_sve_fcadd_h,
3860 gen_helper_sve_fcadd_s,
3861 gen_helper_sve_fcadd_d
3862 };
3863
3864 if (a->esz == 0) {
3865 return false;
3866 }
3867 if (sve_access_check(s)) {
3868 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3869 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
3870 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3871 vec_full_reg_offset(s, a->rn),
3872 vec_full_reg_offset(s, a->rm),
3873 pred_full_reg_offset(s, a->pg),
3874 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3875 tcg_temp_free_ptr(status);
3876 }
3877 return true;
3878}
3879
08975da9
RH
3880static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3881 gen_helper_gvec_5_ptr *fn)
6ceabaad 3882{
08975da9 3883 if (a->esz == 0) {
6ceabaad
RH
3884 return false;
3885 }
08975da9
RH
3886 if (sve_access_check(s)) {
3887 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3888 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
3889 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3890 vec_full_reg_offset(s, a->rn),
3891 vec_full_reg_offset(s, a->rm),
3892 vec_full_reg_offset(s, a->ra),
3893 pred_full_reg_offset(s, a->pg),
3894 status, vsz, vsz, 0, fn);
3895 tcg_temp_free_ptr(status);
6ceabaad 3896 }
6ceabaad
RH
3897 return true;
3898}
3899
3900#define DO_FMLA(NAME, name) \
3a7be554 3901static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 3902{ \
08975da9 3903 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
3904 NULL, gen_helper_sve_##name##_h, \
3905 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3906 }; \
3907 return do_fmla(s, a, fns[a->esz]); \
3908}
3909
3910DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3911DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3912DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3913DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3914
3915#undef DO_FMLA
3916
3a7be554 3917static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 3918{
08975da9
RH
3919 static gen_helper_gvec_5_ptr * const fns[4] = {
3920 NULL,
05f48bab
RH
3921 gen_helper_sve_fcmla_zpzzz_h,
3922 gen_helper_sve_fcmla_zpzzz_s,
3923 gen_helper_sve_fcmla_zpzzz_d,
3924 };
3925
3926 if (a->esz == 0) {
3927 return false;
3928 }
3929 if (sve_access_check(s)) {
3930 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3931 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
3932 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3933 vec_full_reg_offset(s, a->rn),
3934 vec_full_reg_offset(s, a->rm),
3935 vec_full_reg_offset(s, a->ra),
3936 pred_full_reg_offset(s, a->pg),
3937 status, vsz, vsz, a->rot, fns[a->esz]);
3938 tcg_temp_free_ptr(status);
05f48bab
RH
3939 }
3940 return true;
3941}
3942
3a7be554 3943static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405
RH
3944{
3945 static gen_helper_gvec_3_ptr * const fns[2] = {
3946 gen_helper_gvec_fcmlah_idx,
3947 gen_helper_gvec_fcmlas_idx,
3948 };
3949
3950 tcg_debug_assert(a->esz == 1 || a->esz == 2);
3951 tcg_debug_assert(a->rd == a->ra);
3952 if (sve_access_check(s)) {
3953 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3954 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
18fc2405
RH
3955 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3956 vec_full_reg_offset(s, a->rn),
3957 vec_full_reg_offset(s, a->rm),
3958 status, vsz, vsz,
3959 a->index * 4 + a->rot,
3960 fns[a->esz - 1]);
3961 tcg_temp_free_ptr(status);
3962 }
3963 return true;
3964}
3965
8092c6a3
RH
3966/*
3967 *** SVE Floating Point Unary Operations Predicated Group
3968 */
3969
3970static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3971 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3972{
3973 if (sve_access_check(s)) {
3974 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3975 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
3976 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3977 vec_full_reg_offset(s, rn),
3978 pred_full_reg_offset(s, pg),
3979 status, vsz, vsz, 0, fn);
3980 tcg_temp_free_ptr(status);
3981 }
3982 return true;
3983}
3984
3a7be554 3985static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 3986{
e4ab5124 3987 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
3988}
3989
3a7be554 3990static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
3991{
3992 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
3993}
3994
3a7be554 3995static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 3996{
e4ab5124 3997 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
3998}
3999
3a7be554 4000static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4001{
4002 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4003}
4004
3a7be554 4005static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4006{
4007 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4008}
4009
3a7be554 4010static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4011{
4012 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4013}
4014
3a7be554 4015static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4016{
4017 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4018}
4019
3a7be554 4020static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4021{
4022 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4023}
4024
3a7be554 4025static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4026{
4027 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4028}
4029
3a7be554 4030static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4031{
4032 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4033}
4034
3a7be554 4035static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4036{
4037 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4038}
4039
3a7be554 4040static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4041{
4042 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4043}
4044
3a7be554 4045static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4046{
4047 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4048}
4049
3a7be554 4050static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4051{
4052 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4053}
4054
3a7be554 4055static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4056{
4057 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4058}
4059
3a7be554 4060static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4061{
4062 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4063}
4064
3a7be554 4065static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4066{
4067 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4068}
4069
3a7be554 4070static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4071{
4072 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4073}
4074
3a7be554 4075static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4076{
4077 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4078}
4079
3a7be554 4080static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4081{
4082 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4083}
4084
cda3c753
RH
4085static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4086 gen_helper_sve_frint_h,
4087 gen_helper_sve_frint_s,
4088 gen_helper_sve_frint_d
4089};
4090
3a7be554 4091static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4092{
4093 if (a->esz == 0) {
4094 return false;
4095 }
4096 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4097 frint_fns[a->esz - 1]);
4098}
4099
3a7be554 4100static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4101{
4102 static gen_helper_gvec_3_ptr * const fns[3] = {
4103 gen_helper_sve_frintx_h,
4104 gen_helper_sve_frintx_s,
4105 gen_helper_sve_frintx_d
4106 };
4107 if (a->esz == 0) {
4108 return false;
4109 }
4110 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4111}
4112
4113static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4114{
4115 if (a->esz == 0) {
4116 return false;
4117 }
4118 if (sve_access_check(s)) {
4119 unsigned vsz = vec_full_reg_size(s);
4120 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4121 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4122
4123 gen_helper_set_rmode(tmode, tmode, status);
4124
4125 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4126 vec_full_reg_offset(s, a->rn),
4127 pred_full_reg_offset(s, a->pg),
4128 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4129
4130 gen_helper_set_rmode(tmode, tmode, status);
4131 tcg_temp_free_i32(tmode);
4132 tcg_temp_free_ptr(status);
4133 }
4134 return true;
4135}
4136
3a7be554 4137static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4138{
4139 return do_frint_mode(s, a, float_round_nearest_even);
4140}
4141
3a7be554 4142static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4143{
4144 return do_frint_mode(s, a, float_round_up);
4145}
4146
3a7be554 4147static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4148{
4149 return do_frint_mode(s, a, float_round_down);
4150}
4151
3a7be554 4152static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4153{
4154 return do_frint_mode(s, a, float_round_to_zero);
4155}
4156
3a7be554 4157static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4158{
4159 return do_frint_mode(s, a, float_round_ties_away);
4160}
4161
3a7be554 4162static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4163{
4164 static gen_helper_gvec_3_ptr * const fns[3] = {
4165 gen_helper_sve_frecpx_h,
4166 gen_helper_sve_frecpx_s,
4167 gen_helper_sve_frecpx_d
4168 };
4169 if (a->esz == 0) {
4170 return false;
4171 }
4172 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4173}
4174
3a7be554 4175static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4176{
4177 static gen_helper_gvec_3_ptr * const fns[3] = {
4178 gen_helper_sve_fsqrt_h,
4179 gen_helper_sve_fsqrt_s,
4180 gen_helper_sve_fsqrt_d
4181 };
4182 if (a->esz == 0) {
4183 return false;
4184 }
4185 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4186}
4187
3a7be554 4188static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4189{
4190 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4191}
4192
3a7be554 4193static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4194{
4195 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4196}
4197
3a7be554 4198static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4199{
4200 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4201}
4202
3a7be554 4203static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4204{
4205 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4206}
4207
3a7be554 4208static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4209{
4210 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4211}
4212
3a7be554 4213static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4214{
4215 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4216}
4217
3a7be554 4218static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4219{
4220 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4221}
4222
3a7be554 4223static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4224{
4225 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4226}
4227
3a7be554 4228static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4229{
4230 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4231}
4232
3a7be554 4233static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4234{
4235 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4236}
4237
3a7be554 4238static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4239{
4240 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4241}
4242
3a7be554 4243static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4244{
4245 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4246}
4247
3a7be554 4248static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4249{
4250 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4251}
4252
3a7be554 4253static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4254{
4255 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4256}
4257
d1822297
RH
4258/*
4259 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4260 */
4261
4262/* Subroutine loading a vector register at VOFS of LEN bytes.
4263 * The load should begin at the address Rn + IMM.
4264 */
4265
19f2acc9 4266static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4267{
19f2acc9
RH
4268 int len_align = QEMU_ALIGN_DOWN(len, 8);
4269 int len_remain = len % 8;
4270 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4271 int midx = get_mem_index(s);
b2aa8879 4272 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4273
b2aa8879
RH
4274 dirty_addr = tcg_temp_new_i64();
4275 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4276 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4277 tcg_temp_free_i64(dirty_addr);
d1822297 4278
b2aa8879
RH
4279 /*
4280 * Note that unpredicated load/store of vector/predicate registers
d1822297 4281 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4282 * operations on larger quantities.
d1822297
RH
4283 * Attempt to keep code expansion to a minimum by limiting the
4284 * amount of unrolling done.
4285 */
4286 if (nparts <= 4) {
4287 int i;
4288
b2aa8879 4289 t0 = tcg_temp_new_i64();
d1822297 4290 for (i = 0; i < len_align; i += 8) {
b2aa8879 4291 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
d1822297 4292 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4293 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4294 }
b2aa8879 4295 tcg_temp_free_i64(t0);
d1822297
RH
4296 } else {
4297 TCGLabel *loop = gen_new_label();
4298 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4299
b2aa8879
RH
4300 /* Copy the clean address into a local temp, live across the loop. */
4301 t0 = clean_addr;
4b4dc975 4302 clean_addr = new_tmp_a64_local(s);
b2aa8879 4303 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4304
b2aa8879 4305 gen_set_label(loop);
d1822297 4306
b2aa8879
RH
4307 t0 = tcg_temp_new_i64();
4308 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4309 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4310
b2aa8879 4311 tp = tcg_temp_new_ptr();
d1822297
RH
4312 tcg_gen_add_ptr(tp, cpu_env, i);
4313 tcg_gen_addi_ptr(i, i, 8);
4314 tcg_gen_st_i64(t0, tp, vofs);
4315 tcg_temp_free_ptr(tp);
b2aa8879 4316 tcg_temp_free_i64(t0);
d1822297
RH
4317
4318 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4319 tcg_temp_free_ptr(i);
4320 }
4321
b2aa8879
RH
4322 /*
4323 * Predicate register loads can be any multiple of 2.
d1822297
RH
4324 * Note that we still store the entire 64-bit unit into cpu_env.
4325 */
4326 if (len_remain) {
b2aa8879 4327 t0 = tcg_temp_new_i64();
d1822297
RH
4328 switch (len_remain) {
4329 case 2:
4330 case 4:
4331 case 8:
b2aa8879
RH
4332 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4333 MO_LE | ctz32(len_remain));
d1822297
RH
4334 break;
4335
4336 case 6:
4337 t1 = tcg_temp_new_i64();
b2aa8879
RH
4338 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4339 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4340 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4341 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4342 tcg_temp_free_i64(t1);
4343 break;
4344
4345 default:
4346 g_assert_not_reached();
4347 }
4348 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4349 tcg_temp_free_i64(t0);
d1822297 4350 }
d1822297
RH
4351}
4352
5047c204 4353/* Similarly for stores. */
19f2acc9 4354static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4355{
19f2acc9
RH
4356 int len_align = QEMU_ALIGN_DOWN(len, 8);
4357 int len_remain = len % 8;
4358 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4359 int midx = get_mem_index(s);
bba87d0a 4360 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4361
bba87d0a
RH
4362 dirty_addr = tcg_temp_new_i64();
4363 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4364 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4365 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4366
4367 /* Note that unpredicated load/store of vector/predicate registers
4368 * are defined as a stream of bytes, which equates to little-endian
4369 * operations on larger quantities. There is no nice way to force
4370 * a little-endian store for aarch64_be-linux-user out of line.
4371 *
4372 * Attempt to keep code expansion to a minimum by limiting the
4373 * amount of unrolling done.
4374 */
4375 if (nparts <= 4) {
4376 int i;
4377
bba87d0a 4378 t0 = tcg_temp_new_i64();
5047c204
RH
4379 for (i = 0; i < len_align; i += 8) {
4380 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
bba87d0a 4381 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
d8227b09 4382 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4383 }
bba87d0a 4384 tcg_temp_free_i64(t0);
5047c204
RH
4385 } else {
4386 TCGLabel *loop = gen_new_label();
bba87d0a 4387 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4388
bba87d0a
RH
4389 /* Copy the clean address into a local temp, live across the loop. */
4390 t0 = clean_addr;
4b4dc975 4391 clean_addr = new_tmp_a64_local(s);
bba87d0a 4392 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4393
bba87d0a 4394 gen_set_label(loop);
5047c204 4395
bba87d0a
RH
4396 t0 = tcg_temp_new_i64();
4397 tp = tcg_temp_new_ptr();
4398 tcg_gen_add_ptr(tp, cpu_env, i);
4399 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4400 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4401 tcg_temp_free_ptr(tp);
4402
4403 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4404 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4405 tcg_temp_free_i64(t0);
5047c204
RH
4406
4407 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4408 tcg_temp_free_ptr(i);
4409 }
4410
4411 /* Predicate register stores can be any multiple of 2. */
4412 if (len_remain) {
bba87d0a 4413 t0 = tcg_temp_new_i64();
5047c204 4414 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4415
4416 switch (len_remain) {
4417 case 2:
4418 case 4:
4419 case 8:
bba87d0a
RH
4420 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4421 MO_LE | ctz32(len_remain));
5047c204
RH
4422 break;
4423
4424 case 6:
bba87d0a
RH
4425 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4426 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4427 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4428 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4429 break;
4430
4431 default:
4432 g_assert_not_reached();
4433 }
bba87d0a 4434 tcg_temp_free_i64(t0);
5047c204 4435 }
5047c204
RH
4436}
4437
3a7be554 4438static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4439{
4440 if (sve_access_check(s)) {
4441 int size = vec_full_reg_size(s);
4442 int off = vec_full_reg_offset(s, a->rd);
4443 do_ldr(s, off, size, a->rn, a->imm * size);
4444 }
4445 return true;
4446}
4447
3a7be554 4448static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4449{
4450 if (sve_access_check(s)) {
4451 int size = pred_full_reg_size(s);
4452 int off = pred_full_reg_offset(s, a->rd);
4453 do_ldr(s, off, size, a->rn, a->imm * size);
4454 }
4455 return true;
4456}
c4e7c493 4457
3a7be554 4458static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4459{
4460 if (sve_access_check(s)) {
4461 int size = vec_full_reg_size(s);
4462 int off = vec_full_reg_offset(s, a->rd);
4463 do_str(s, off, size, a->rn, a->imm * size);
4464 }
4465 return true;
4466}
4467
3a7be554 4468static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4469{
4470 if (sve_access_check(s)) {
4471 int size = pred_full_reg_size(s);
4472 int off = pred_full_reg_offset(s, a->rd);
4473 do_str(s, off, size, a->rn, a->imm * size);
4474 }
4475 return true;
4476}
4477
c4e7c493
RH
4478/*
4479 *** SVE Memory - Contiguous Load Group
4480 */
4481
4482/* The memory mode of the dtype. */
14776ab5 4483static const MemOp dtype_mop[16] = {
c4e7c493
RH
4484 MO_UB, MO_UB, MO_UB, MO_UB,
4485 MO_SL, MO_UW, MO_UW, MO_UW,
4486 MO_SW, MO_SW, MO_UL, MO_UL,
4487 MO_SB, MO_SB, MO_SB, MO_Q
4488};
4489
4490#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4491
4492/* The vector element size of dtype. */
4493static const uint8_t dtype_esz[16] = {
4494 0, 1, 2, 3,
4495 3, 1, 2, 3,
4496 3, 2, 2, 3,
4497 3, 2, 1, 3
4498};
4499
4500static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4501 int dtype, uint32_t mte_n, bool is_write,
4502 gen_helper_gvec_mem *fn)
c4e7c493
RH
4503{
4504 unsigned vsz = vec_full_reg_size(s);
4505 TCGv_ptr t_pg;
500d0484 4506 TCGv_i32 t_desc;
206adacf 4507 int desc = 0;
c4e7c493 4508
206adacf
RH
4509 /*
4510 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4511 * registers as pointers, so encode the regno into the data field.
4512 * For consistency, do this even for LD1.
4513 */
9473d0ec 4514 if (s->mte_active[0]) {
206adacf
RH
4515 int msz = dtype_msz(dtype);
4516
4517 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4518 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4519 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4520 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4521 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
4522 desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz);
4523 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4524 } else {
4525 addr = clean_data_tbi(s, addr);
206adacf 4526 }
9473d0ec 4527
206adacf 4528 desc = simd_desc(vsz, vsz, zt | desc);
500d0484 4529 t_desc = tcg_const_i32(desc);
c4e7c493
RH
4530 t_pg = tcg_temp_new_ptr();
4531
4532 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
500d0484 4533 fn(cpu_env, t_pg, addr, t_desc);
c4e7c493
RH
4534
4535 tcg_temp_free_ptr(t_pg);
500d0484 4536 tcg_temp_free_i32(t_desc);
c4e7c493
RH
4537}
4538
4539static void do_ld_zpa(DisasContext *s, int zt, int pg,
4540 TCGv_i64 addr, int dtype, int nreg)
4541{
206adacf
RH
4542 static gen_helper_gvec_mem * const fns[2][2][16][4] = {
4543 { /* mte inactive, little-endian */
4544 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
7d0a57a2 4545 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
206adacf
RH
4546 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4547 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4548 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4549
4550 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4551 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4552 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4553 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4554 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4555
4556 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4557 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4558 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4559 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4560 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4561
4562 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4563 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4564 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4565 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4566 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4567
4568 /* mte inactive, big-endian */
4569 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4570 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4571 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4572 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4573 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4574
4575 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4576 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4577 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4578 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4579 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4580
4581 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4582 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4583 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4584 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4585 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4586
4587 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4588 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4589 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4590 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4591 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4592
4593 { /* mte active, little-endian */
4594 { { gen_helper_sve_ld1bb_r_mte,
4595 gen_helper_sve_ld2bb_r_mte,
4596 gen_helper_sve_ld3bb_r_mte,
4597 gen_helper_sve_ld4bb_r_mte },
4598 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4599 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4600 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4601
4602 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4603 { gen_helper_sve_ld1hh_le_r_mte,
4604 gen_helper_sve_ld2hh_le_r_mte,
4605 gen_helper_sve_ld3hh_le_r_mte,
4606 gen_helper_sve_ld4hh_le_r_mte },
4607 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4608 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4609
4610 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4611 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4612 { gen_helper_sve_ld1ss_le_r_mte,
4613 gen_helper_sve_ld2ss_le_r_mte,
4614 gen_helper_sve_ld3ss_le_r_mte,
4615 gen_helper_sve_ld4ss_le_r_mte },
4616 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4617
4618 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4619 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4620 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4621 { gen_helper_sve_ld1dd_le_r_mte,
4622 gen_helper_sve_ld2dd_le_r_mte,
4623 gen_helper_sve_ld3dd_le_r_mte,
4624 gen_helper_sve_ld4dd_le_r_mte } },
4625
4626 /* mte active, big-endian */
4627 { { gen_helper_sve_ld1bb_r_mte,
4628 gen_helper_sve_ld2bb_r_mte,
4629 gen_helper_sve_ld3bb_r_mte,
4630 gen_helper_sve_ld4bb_r_mte },
4631 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4632 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4633 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4634
4635 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4636 { gen_helper_sve_ld1hh_be_r_mte,
4637 gen_helper_sve_ld2hh_be_r_mte,
4638 gen_helper_sve_ld3hh_be_r_mte,
4639 gen_helper_sve_ld4hh_be_r_mte },
4640 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4641 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4642
4643 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4644 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4645 { gen_helper_sve_ld1ss_be_r_mte,
4646 gen_helper_sve_ld2ss_be_r_mte,
4647 gen_helper_sve_ld3ss_be_r_mte,
4648 gen_helper_sve_ld4ss_be_r_mte },
4649 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4650
4651 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4652 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4653 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4654 { gen_helper_sve_ld1dd_be_r_mte,
4655 gen_helper_sve_ld2dd_be_r_mte,
4656 gen_helper_sve_ld3dd_be_r_mte,
4657 gen_helper_sve_ld4dd_be_r_mte } } },
c4e7c493 4658 };
206adacf
RH
4659 gen_helper_gvec_mem *fn
4660 = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4661
206adacf
RH
4662 /*
4663 * While there are holes in the table, they are not
c4e7c493
RH
4664 * accessible via the instruction encoding.
4665 */
4666 assert(fn != NULL);
206adacf 4667 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4668}
4669
3a7be554 4670static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4671{
4672 if (a->rm == 31) {
4673 return false;
4674 }
4675 if (sve_access_check(s)) {
4676 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4677 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4678 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4679 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4680 }
4681 return true;
4682}
4683
3a7be554 4684static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4685{
4686 if (sve_access_check(s)) {
4687 int vsz = vec_full_reg_size(s);
4688 int elements = vsz >> dtype_esz[a->dtype];
4689 TCGv_i64 addr = new_tmp_a64(s);
4690
4691 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4692 (a->imm * elements * (a->nreg + 1))
4693 << dtype_msz(a->dtype));
4694 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4695 }
4696 return true;
4697}
e2654d75 4698
3a7be554 4699static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4700{
aa13f7c3
RH
4701 static gen_helper_gvec_mem * const fns[2][2][16] = {
4702 { /* mte inactive, little-endian */
4703 { gen_helper_sve_ldff1bb_r,
4704 gen_helper_sve_ldff1bhu_r,
4705 gen_helper_sve_ldff1bsu_r,
4706 gen_helper_sve_ldff1bdu_r,
4707
4708 gen_helper_sve_ldff1sds_le_r,
4709 gen_helper_sve_ldff1hh_le_r,
4710 gen_helper_sve_ldff1hsu_le_r,
4711 gen_helper_sve_ldff1hdu_le_r,
4712
4713 gen_helper_sve_ldff1hds_le_r,
4714 gen_helper_sve_ldff1hss_le_r,
4715 gen_helper_sve_ldff1ss_le_r,
4716 gen_helper_sve_ldff1sdu_le_r,
4717
4718 gen_helper_sve_ldff1bds_r,
4719 gen_helper_sve_ldff1bss_r,
4720 gen_helper_sve_ldff1bhs_r,
4721 gen_helper_sve_ldff1dd_le_r },
4722
4723 /* mte inactive, big-endian */
4724 { gen_helper_sve_ldff1bb_r,
4725 gen_helper_sve_ldff1bhu_r,
4726 gen_helper_sve_ldff1bsu_r,
4727 gen_helper_sve_ldff1bdu_r,
4728
4729 gen_helper_sve_ldff1sds_be_r,
4730 gen_helper_sve_ldff1hh_be_r,
4731 gen_helper_sve_ldff1hsu_be_r,
4732 gen_helper_sve_ldff1hdu_be_r,
4733
4734 gen_helper_sve_ldff1hds_be_r,
4735 gen_helper_sve_ldff1hss_be_r,
4736 gen_helper_sve_ldff1ss_be_r,
4737 gen_helper_sve_ldff1sdu_be_r,
4738
4739 gen_helper_sve_ldff1bds_r,
4740 gen_helper_sve_ldff1bss_r,
4741 gen_helper_sve_ldff1bhs_r,
4742 gen_helper_sve_ldff1dd_be_r } },
4743
4744 { /* mte active, little-endian */
4745 { gen_helper_sve_ldff1bb_r_mte,
4746 gen_helper_sve_ldff1bhu_r_mte,
4747 gen_helper_sve_ldff1bsu_r_mte,
4748 gen_helper_sve_ldff1bdu_r_mte,
4749
4750 gen_helper_sve_ldff1sds_le_r_mte,
4751 gen_helper_sve_ldff1hh_le_r_mte,
4752 gen_helper_sve_ldff1hsu_le_r_mte,
4753 gen_helper_sve_ldff1hdu_le_r_mte,
4754
4755 gen_helper_sve_ldff1hds_le_r_mte,
4756 gen_helper_sve_ldff1hss_le_r_mte,
4757 gen_helper_sve_ldff1ss_le_r_mte,
4758 gen_helper_sve_ldff1sdu_le_r_mte,
4759
4760 gen_helper_sve_ldff1bds_r_mte,
4761 gen_helper_sve_ldff1bss_r_mte,
4762 gen_helper_sve_ldff1bhs_r_mte,
4763 gen_helper_sve_ldff1dd_le_r_mte },
4764
4765 /* mte active, big-endian */
4766 { gen_helper_sve_ldff1bb_r_mte,
4767 gen_helper_sve_ldff1bhu_r_mte,
4768 gen_helper_sve_ldff1bsu_r_mte,
4769 gen_helper_sve_ldff1bdu_r_mte,
4770
4771 gen_helper_sve_ldff1sds_be_r_mte,
4772 gen_helper_sve_ldff1hh_be_r_mte,
4773 gen_helper_sve_ldff1hsu_be_r_mte,
4774 gen_helper_sve_ldff1hdu_be_r_mte,
4775
4776 gen_helper_sve_ldff1hds_be_r_mte,
4777 gen_helper_sve_ldff1hss_be_r_mte,
4778 gen_helper_sve_ldff1ss_be_r_mte,
4779 gen_helper_sve_ldff1sdu_be_r_mte,
4780
4781 gen_helper_sve_ldff1bds_r_mte,
4782 gen_helper_sve_ldff1bss_r_mte,
4783 gen_helper_sve_ldff1bhs_r_mte,
4784 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4785 };
4786
4787 if (sve_access_check(s)) {
4788 TCGv_i64 addr = new_tmp_a64(s);
4789 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4790 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4791 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4792 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4793 }
4794 return true;
4795}
4796
3a7be554 4797static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4798{
aa13f7c3
RH
4799 static gen_helper_gvec_mem * const fns[2][2][16] = {
4800 { /* mte inactive, little-endian */
4801 { gen_helper_sve_ldnf1bb_r,
4802 gen_helper_sve_ldnf1bhu_r,
4803 gen_helper_sve_ldnf1bsu_r,
4804 gen_helper_sve_ldnf1bdu_r,
4805
4806 gen_helper_sve_ldnf1sds_le_r,
4807 gen_helper_sve_ldnf1hh_le_r,
4808 gen_helper_sve_ldnf1hsu_le_r,
4809 gen_helper_sve_ldnf1hdu_le_r,
4810
4811 gen_helper_sve_ldnf1hds_le_r,
4812 gen_helper_sve_ldnf1hss_le_r,
4813 gen_helper_sve_ldnf1ss_le_r,
4814 gen_helper_sve_ldnf1sdu_le_r,
4815
4816 gen_helper_sve_ldnf1bds_r,
4817 gen_helper_sve_ldnf1bss_r,
4818 gen_helper_sve_ldnf1bhs_r,
4819 gen_helper_sve_ldnf1dd_le_r },
4820
4821 /* mte inactive, big-endian */
4822 { gen_helper_sve_ldnf1bb_r,
4823 gen_helper_sve_ldnf1bhu_r,
4824 gen_helper_sve_ldnf1bsu_r,
4825 gen_helper_sve_ldnf1bdu_r,
4826
4827 gen_helper_sve_ldnf1sds_be_r,
4828 gen_helper_sve_ldnf1hh_be_r,
4829 gen_helper_sve_ldnf1hsu_be_r,
4830 gen_helper_sve_ldnf1hdu_be_r,
4831
4832 gen_helper_sve_ldnf1hds_be_r,
4833 gen_helper_sve_ldnf1hss_be_r,
4834 gen_helper_sve_ldnf1ss_be_r,
4835 gen_helper_sve_ldnf1sdu_be_r,
4836
4837 gen_helper_sve_ldnf1bds_r,
4838 gen_helper_sve_ldnf1bss_r,
4839 gen_helper_sve_ldnf1bhs_r,
4840 gen_helper_sve_ldnf1dd_be_r } },
4841
4842 { /* mte inactive, little-endian */
4843 { gen_helper_sve_ldnf1bb_r_mte,
4844 gen_helper_sve_ldnf1bhu_r_mte,
4845 gen_helper_sve_ldnf1bsu_r_mte,
4846 gen_helper_sve_ldnf1bdu_r_mte,
4847
4848 gen_helper_sve_ldnf1sds_le_r_mte,
4849 gen_helper_sve_ldnf1hh_le_r_mte,
4850 gen_helper_sve_ldnf1hsu_le_r_mte,
4851 gen_helper_sve_ldnf1hdu_le_r_mte,
4852
4853 gen_helper_sve_ldnf1hds_le_r_mte,
4854 gen_helper_sve_ldnf1hss_le_r_mte,
4855 gen_helper_sve_ldnf1ss_le_r_mte,
4856 gen_helper_sve_ldnf1sdu_le_r_mte,
4857
4858 gen_helper_sve_ldnf1bds_r_mte,
4859 gen_helper_sve_ldnf1bss_r_mte,
4860 gen_helper_sve_ldnf1bhs_r_mte,
4861 gen_helper_sve_ldnf1dd_le_r_mte },
4862
4863 /* mte inactive, big-endian */
4864 { gen_helper_sve_ldnf1bb_r_mte,
4865 gen_helper_sve_ldnf1bhu_r_mte,
4866 gen_helper_sve_ldnf1bsu_r_mte,
4867 gen_helper_sve_ldnf1bdu_r_mte,
4868
4869 gen_helper_sve_ldnf1sds_be_r_mte,
4870 gen_helper_sve_ldnf1hh_be_r_mte,
4871 gen_helper_sve_ldnf1hsu_be_r_mte,
4872 gen_helper_sve_ldnf1hdu_be_r_mte,
4873
4874 gen_helper_sve_ldnf1hds_be_r_mte,
4875 gen_helper_sve_ldnf1hss_be_r_mte,
4876 gen_helper_sve_ldnf1ss_be_r_mte,
4877 gen_helper_sve_ldnf1sdu_be_r_mte,
4878
4879 gen_helper_sve_ldnf1bds_r_mte,
4880 gen_helper_sve_ldnf1bss_r_mte,
4881 gen_helper_sve_ldnf1bhs_r_mte,
4882 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4883 };
4884
4885 if (sve_access_check(s)) {
4886 int vsz = vec_full_reg_size(s);
4887 int elements = vsz >> dtype_esz[a->dtype];
4888 int off = (a->imm * elements) << dtype_msz(a->dtype);
4889 TCGv_i64 addr = new_tmp_a64(s);
4890
4891 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4892 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4893 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4894 }
4895 return true;
4896}
1a039c7e 4897
05abe304
RH
4898static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4899{
7d0a57a2
RH
4900 static gen_helper_gvec_mem * const fns[2][4] = {
4901 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4902 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4903 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4904 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
05abe304
RH
4905 };
4906 unsigned vsz = vec_full_reg_size(s);
4907 TCGv_ptr t_pg;
500d0484
RH
4908 TCGv_i32 t_desc;
4909 int desc, poff;
05abe304
RH
4910
4911 /* Load the first quadword using the normal predicated load helpers. */
ba080b86 4912 desc = simd_desc(16, 16, zt);
500d0484 4913 t_desc = tcg_const_i32(desc);
2a99ab2b
RH
4914
4915 poff = pred_full_reg_offset(s, pg);
4916 if (vsz > 16) {
4917 /*
4918 * Zero-extend the first 16 bits of the predicate into a temporary.
4919 * This avoids triggering an assert making sure we don't have bits
4920 * set within a predicate beyond VQ, but we have lowered VQ to 1
4921 * for this load operation.
4922 */
4923 TCGv_i64 tmp = tcg_temp_new_i64();
4924#ifdef HOST_WORDS_BIGENDIAN
4925 poff += 6;
4926#endif
4927 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4928
4929 poff = offsetof(CPUARMState, vfp.preg_tmp);
4930 tcg_gen_st_i64(tmp, cpu_env, poff);
4931 tcg_temp_free_i64(tmp);
4932 }
4933
05abe304 4934 t_pg = tcg_temp_new_ptr();
2a99ab2b 4935 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 4936
500d0484 4937 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
05abe304
RH
4938
4939 tcg_temp_free_ptr(t_pg);
500d0484 4940 tcg_temp_free_i32(t_desc);
05abe304
RH
4941
4942 /* Replicate that first quadword. */
4943 if (vsz > 16) {
4944 unsigned dofs = vec_full_reg_offset(s, zt);
4945 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4946 }
4947}
4948
3a7be554 4949static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
4950{
4951 if (a->rm == 31) {
4952 return false;
4953 }
4954 if (sve_access_check(s)) {
4955 int msz = dtype_msz(a->dtype);
4956 TCGv_i64 addr = new_tmp_a64(s);
4957 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4958 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4959 do_ldrq(s, a->rd, a->pg, addr, msz);
4960 }
4961 return true;
4962}
4963
3a7be554 4964static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
4965{
4966 if (sve_access_check(s)) {
4967 TCGv_i64 addr = new_tmp_a64(s);
4968 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4969 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4970 }
4971 return true;
4972}
4973
68459864 4974/* Load and broadcast element. */
3a7be554 4975static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 4976{
68459864
RH
4977 unsigned vsz = vec_full_reg_size(s);
4978 unsigned psz = pred_full_reg_size(s);
4979 unsigned esz = dtype_esz[a->dtype];
d0e372b0 4980 unsigned msz = dtype_msz(a->dtype);
c0ed9166 4981 TCGLabel *over;
4ac430e1 4982 TCGv_i64 temp, clean_addr;
68459864 4983
c0ed9166
RH
4984 if (!sve_access_check(s)) {
4985 return true;
4986 }
4987
4988 over = gen_new_label();
4989
68459864
RH
4990 /* If the guarding predicate has no bits set, no load occurs. */
4991 if (psz <= 8) {
4992 /* Reduce the pred_esz_masks value simply to reduce the
4993 * size of the code generated here.
4994 */
4995 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4996 temp = tcg_temp_new_i64();
4997 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4998 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4999 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5000 tcg_temp_free_i64(temp);
5001 } else {
5002 TCGv_i32 t32 = tcg_temp_new_i32();
5003 find_last_active(s, t32, esz, a->pg);
5004 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5005 tcg_temp_free_i32(t32);
5006 }
5007
5008 /* Load the data. */
5009 temp = tcg_temp_new_i64();
d0e372b0 5010 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5011 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5012
5013 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
68459864
RH
5014 s->be_data | dtype_mop[a->dtype]);
5015
5016 /* Broadcast to *all* elements. */
5017 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5018 vsz, vsz, temp);
5019 tcg_temp_free_i64(temp);
5020
5021 /* Zero the inactive elements. */
5022 gen_set_label(over);
60245996 5023 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5024}
5025
1a039c7e
RH
5026static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5027 int msz, int esz, int nreg)
5028{
71b9f394
RH
5029 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5030 { { { gen_helper_sve_st1bb_r,
5031 gen_helper_sve_st1bh_r,
5032 gen_helper_sve_st1bs_r,
5033 gen_helper_sve_st1bd_r },
5034 { NULL,
5035 gen_helper_sve_st1hh_le_r,
5036 gen_helper_sve_st1hs_le_r,
5037 gen_helper_sve_st1hd_le_r },
5038 { NULL, NULL,
5039 gen_helper_sve_st1ss_le_r,
5040 gen_helper_sve_st1sd_le_r },
5041 { NULL, NULL, NULL,
5042 gen_helper_sve_st1dd_le_r } },
5043 { { gen_helper_sve_st1bb_r,
5044 gen_helper_sve_st1bh_r,
5045 gen_helper_sve_st1bs_r,
5046 gen_helper_sve_st1bd_r },
5047 { NULL,
5048 gen_helper_sve_st1hh_be_r,
5049 gen_helper_sve_st1hs_be_r,
5050 gen_helper_sve_st1hd_be_r },
5051 { NULL, NULL,
5052 gen_helper_sve_st1ss_be_r,
5053 gen_helper_sve_st1sd_be_r },
5054 { NULL, NULL, NULL,
5055 gen_helper_sve_st1dd_be_r } } },
5056
5057 { { { gen_helper_sve_st1bb_r_mte,
5058 gen_helper_sve_st1bh_r_mte,
5059 gen_helper_sve_st1bs_r_mte,
5060 gen_helper_sve_st1bd_r_mte },
5061 { NULL,
5062 gen_helper_sve_st1hh_le_r_mte,
5063 gen_helper_sve_st1hs_le_r_mte,
5064 gen_helper_sve_st1hd_le_r_mte },
5065 { NULL, NULL,
5066 gen_helper_sve_st1ss_le_r_mte,
5067 gen_helper_sve_st1sd_le_r_mte },
5068 { NULL, NULL, NULL,
5069 gen_helper_sve_st1dd_le_r_mte } },
5070 { { gen_helper_sve_st1bb_r_mte,
5071 gen_helper_sve_st1bh_r_mte,
5072 gen_helper_sve_st1bs_r_mte,
5073 gen_helper_sve_st1bd_r_mte },
5074 { NULL,
5075 gen_helper_sve_st1hh_be_r_mte,
5076 gen_helper_sve_st1hs_be_r_mte,
5077 gen_helper_sve_st1hd_be_r_mte },
5078 { NULL, NULL,
5079 gen_helper_sve_st1ss_be_r_mte,
5080 gen_helper_sve_st1sd_be_r_mte },
5081 { NULL, NULL, NULL,
5082 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5083 };
71b9f394
RH
5084 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5085 { { { gen_helper_sve_st2bb_r,
5086 gen_helper_sve_st2hh_le_r,
5087 gen_helper_sve_st2ss_le_r,
5088 gen_helper_sve_st2dd_le_r },
5089 { gen_helper_sve_st3bb_r,
5090 gen_helper_sve_st3hh_le_r,
5091 gen_helper_sve_st3ss_le_r,
5092 gen_helper_sve_st3dd_le_r },
5093 { gen_helper_sve_st4bb_r,
5094 gen_helper_sve_st4hh_le_r,
5095 gen_helper_sve_st4ss_le_r,
5096 gen_helper_sve_st4dd_le_r } },
5097 { { gen_helper_sve_st2bb_r,
5098 gen_helper_sve_st2hh_be_r,
5099 gen_helper_sve_st2ss_be_r,
5100 gen_helper_sve_st2dd_be_r },
5101 { gen_helper_sve_st3bb_r,
5102 gen_helper_sve_st3hh_be_r,
5103 gen_helper_sve_st3ss_be_r,
5104 gen_helper_sve_st3dd_be_r },
5105 { gen_helper_sve_st4bb_r,
5106 gen_helper_sve_st4hh_be_r,
5107 gen_helper_sve_st4ss_be_r,
5108 gen_helper_sve_st4dd_be_r } } },
5109 { { { gen_helper_sve_st2bb_r_mte,
5110 gen_helper_sve_st2hh_le_r_mte,
5111 gen_helper_sve_st2ss_le_r_mte,
5112 gen_helper_sve_st2dd_le_r_mte },
5113 { gen_helper_sve_st3bb_r_mte,
5114 gen_helper_sve_st3hh_le_r_mte,
5115 gen_helper_sve_st3ss_le_r_mte,
5116 gen_helper_sve_st3dd_le_r_mte },
5117 { gen_helper_sve_st4bb_r_mte,
5118 gen_helper_sve_st4hh_le_r_mte,
5119 gen_helper_sve_st4ss_le_r_mte,
5120 gen_helper_sve_st4dd_le_r_mte } },
5121 { { gen_helper_sve_st2bb_r_mte,
5122 gen_helper_sve_st2hh_be_r_mte,
5123 gen_helper_sve_st2ss_be_r_mte,
5124 gen_helper_sve_st2dd_be_r_mte },
5125 { gen_helper_sve_st3bb_r_mte,
5126 gen_helper_sve_st3hh_be_r_mte,
5127 gen_helper_sve_st3ss_be_r_mte,
5128 gen_helper_sve_st3dd_be_r_mte },
5129 { gen_helper_sve_st4bb_r_mte,
5130 gen_helper_sve_st4hh_be_r_mte,
5131 gen_helper_sve_st4ss_be_r_mte,
5132 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5133 };
5134 gen_helper_gvec_mem *fn;
28d57f2d 5135 int be = s->be_data == MO_BE;
1a039c7e
RH
5136
5137 if (nreg == 0) {
5138 /* ST1 */
71b9f394
RH
5139 fn = fn_single[s->mte_active[0]][be][msz][esz];
5140 nreg = 1;
1a039c7e
RH
5141 } else {
5142 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5143 assert(msz == esz);
71b9f394 5144 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5145 }
5146 assert(fn != NULL);
71b9f394 5147 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5148}
5149
3a7be554 5150static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5151{
5152 if (a->rm == 31 || a->msz > a->esz) {
5153 return false;
5154 }
5155 if (sve_access_check(s)) {
5156 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5157 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5158 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5159 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5160 }
5161 return true;
5162}
5163
3a7be554 5164static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5165{
5166 if (a->msz > a->esz) {
5167 return false;
5168 }
5169 if (sve_access_check(s)) {
5170 int vsz = vec_full_reg_size(s);
5171 int elements = vsz >> a->esz;
5172 TCGv_i64 addr = new_tmp_a64(s);
5173
5174 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5175 (a->imm * elements * (a->nreg + 1)) << a->msz);
5176 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5177 }
5178 return true;
5179}
f6dbf62a
RH
5180
5181/*
5182 *** SVE gather loads / scatter stores
5183 */
5184
500d0484 5185static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5186 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5187 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5188{
5189 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5190 TCGv_ptr t_zm = tcg_temp_new_ptr();
5191 TCGv_ptr t_pg = tcg_temp_new_ptr();
5192 TCGv_ptr t_zt = tcg_temp_new_ptr();
500d0484 5193 TCGv_i32 t_desc;
d28d12f0 5194 int desc = 0;
500d0484 5195
d28d12f0
RH
5196 if (s->mte_active[0]) {
5197 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5198 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5199 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5200 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5201 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
5202 desc <<= SVE_MTEDESC_SHIFT;
5203 }
cdecb3fc 5204 desc = simd_desc(vsz, vsz, desc | scale);
500d0484 5205 t_desc = tcg_const_i32(desc);
f6dbf62a
RH
5206
5207 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5208 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5209 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
500d0484 5210 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
f6dbf62a
RH
5211
5212 tcg_temp_free_ptr(t_zt);
5213 tcg_temp_free_ptr(t_zm);
5214 tcg_temp_free_ptr(t_pg);
500d0484 5215 tcg_temp_free_i32(t_desc);
f6dbf62a
RH
5216}
5217
d28d12f0
RH
5218/* Indexed by [mte][be][ff][xs][u][msz]. */
5219static gen_helper_gvec_mem_scatter * const
5220gather_load_fn32[2][2][2][2][2][3] = {
5221 { /* MTE Inactive */
5222 { /* Little-endian */
5223 { { { gen_helper_sve_ldbss_zsu,
5224 gen_helper_sve_ldhss_le_zsu,
5225 NULL, },
5226 { gen_helper_sve_ldbsu_zsu,
5227 gen_helper_sve_ldhsu_le_zsu,
5228 gen_helper_sve_ldss_le_zsu, } },
5229 { { gen_helper_sve_ldbss_zss,
5230 gen_helper_sve_ldhss_le_zss,
5231 NULL, },
5232 { gen_helper_sve_ldbsu_zss,
5233 gen_helper_sve_ldhsu_le_zss,
5234 gen_helper_sve_ldss_le_zss, } } },
5235
5236 /* First-fault */
5237 { { { gen_helper_sve_ldffbss_zsu,
5238 gen_helper_sve_ldffhss_le_zsu,
5239 NULL, },
5240 { gen_helper_sve_ldffbsu_zsu,
5241 gen_helper_sve_ldffhsu_le_zsu,
5242 gen_helper_sve_ldffss_le_zsu, } },
5243 { { gen_helper_sve_ldffbss_zss,
5244 gen_helper_sve_ldffhss_le_zss,
5245 NULL, },
5246 { gen_helper_sve_ldffbsu_zss,
5247 gen_helper_sve_ldffhsu_le_zss,
5248 gen_helper_sve_ldffss_le_zss, } } } },
5249
5250 { /* Big-endian */
5251 { { { gen_helper_sve_ldbss_zsu,
5252 gen_helper_sve_ldhss_be_zsu,
5253 NULL, },
5254 { gen_helper_sve_ldbsu_zsu,
5255 gen_helper_sve_ldhsu_be_zsu,
5256 gen_helper_sve_ldss_be_zsu, } },
5257 { { gen_helper_sve_ldbss_zss,
5258 gen_helper_sve_ldhss_be_zss,
5259 NULL, },
5260 { gen_helper_sve_ldbsu_zss,
5261 gen_helper_sve_ldhsu_be_zss,
5262 gen_helper_sve_ldss_be_zss, } } },
5263
5264 /* First-fault */
5265 { { { gen_helper_sve_ldffbss_zsu,
5266 gen_helper_sve_ldffhss_be_zsu,
5267 NULL, },
5268 { gen_helper_sve_ldffbsu_zsu,
5269 gen_helper_sve_ldffhsu_be_zsu,
5270 gen_helper_sve_ldffss_be_zsu, } },
5271 { { gen_helper_sve_ldffbss_zss,
5272 gen_helper_sve_ldffhss_be_zss,
5273 NULL, },
5274 { gen_helper_sve_ldffbsu_zss,
5275 gen_helper_sve_ldffhsu_be_zss,
5276 gen_helper_sve_ldffss_be_zss, } } } } },
5277 { /* MTE Active */
5278 { /* Little-endian */
5279 { { { gen_helper_sve_ldbss_zsu_mte,
5280 gen_helper_sve_ldhss_le_zsu_mte,
5281 NULL, },
5282 { gen_helper_sve_ldbsu_zsu_mte,
5283 gen_helper_sve_ldhsu_le_zsu_mte,
5284 gen_helper_sve_ldss_le_zsu_mte, } },
5285 { { gen_helper_sve_ldbss_zss_mte,
5286 gen_helper_sve_ldhss_le_zss_mte,
5287 NULL, },
5288 { gen_helper_sve_ldbsu_zss_mte,
5289 gen_helper_sve_ldhsu_le_zss_mte,
5290 gen_helper_sve_ldss_le_zss_mte, } } },
5291
5292 /* First-fault */
5293 { { { gen_helper_sve_ldffbss_zsu_mte,
5294 gen_helper_sve_ldffhss_le_zsu_mte,
5295 NULL, },
5296 { gen_helper_sve_ldffbsu_zsu_mte,
5297 gen_helper_sve_ldffhsu_le_zsu_mte,
5298 gen_helper_sve_ldffss_le_zsu_mte, } },
5299 { { gen_helper_sve_ldffbss_zss_mte,
5300 gen_helper_sve_ldffhss_le_zss_mte,
5301 NULL, },
5302 { gen_helper_sve_ldffbsu_zss_mte,
5303 gen_helper_sve_ldffhsu_le_zss_mte,
5304 gen_helper_sve_ldffss_le_zss_mte, } } } },
5305
5306 { /* Big-endian */
5307 { { { gen_helper_sve_ldbss_zsu_mte,
5308 gen_helper_sve_ldhss_be_zsu_mte,
5309 NULL, },
5310 { gen_helper_sve_ldbsu_zsu_mte,
5311 gen_helper_sve_ldhsu_be_zsu_mte,
5312 gen_helper_sve_ldss_be_zsu_mte, } },
5313 { { gen_helper_sve_ldbss_zss_mte,
5314 gen_helper_sve_ldhss_be_zss_mte,
5315 NULL, },
5316 { gen_helper_sve_ldbsu_zss_mte,
5317 gen_helper_sve_ldhsu_be_zss_mte,
5318 gen_helper_sve_ldss_be_zss_mte, } } },
5319
5320 /* First-fault */
5321 { { { gen_helper_sve_ldffbss_zsu_mte,
5322 gen_helper_sve_ldffhss_be_zsu_mte,
5323 NULL, },
5324 { gen_helper_sve_ldffbsu_zsu_mte,
5325 gen_helper_sve_ldffhsu_be_zsu_mte,
5326 gen_helper_sve_ldffss_be_zsu_mte, } },
5327 { { gen_helper_sve_ldffbss_zss_mte,
5328 gen_helper_sve_ldffhss_be_zss_mte,
5329 NULL, },
5330 { gen_helper_sve_ldffbsu_zss_mte,
5331 gen_helper_sve_ldffhsu_be_zss_mte,
5332 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5333};
5334
5335/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5336static gen_helper_gvec_mem_scatter * const
5337gather_load_fn64[2][2][2][3][2][4] = {
5338 { /* MTE Inactive */
5339 { /* Little-endian */
5340 { { { gen_helper_sve_ldbds_zsu,
5341 gen_helper_sve_ldhds_le_zsu,
5342 gen_helper_sve_ldsds_le_zsu,
5343 NULL, },
5344 { gen_helper_sve_ldbdu_zsu,
5345 gen_helper_sve_ldhdu_le_zsu,
5346 gen_helper_sve_ldsdu_le_zsu,
5347 gen_helper_sve_lddd_le_zsu, } },
5348 { { gen_helper_sve_ldbds_zss,
5349 gen_helper_sve_ldhds_le_zss,
5350 gen_helper_sve_ldsds_le_zss,
5351 NULL, },
5352 { gen_helper_sve_ldbdu_zss,
5353 gen_helper_sve_ldhdu_le_zss,
5354 gen_helper_sve_ldsdu_le_zss,
5355 gen_helper_sve_lddd_le_zss, } },
5356 { { gen_helper_sve_ldbds_zd,
5357 gen_helper_sve_ldhds_le_zd,
5358 gen_helper_sve_ldsds_le_zd,
5359 NULL, },
5360 { gen_helper_sve_ldbdu_zd,
5361 gen_helper_sve_ldhdu_le_zd,
5362 gen_helper_sve_ldsdu_le_zd,
5363 gen_helper_sve_lddd_le_zd, } } },
5364
5365 /* First-fault */
5366 { { { gen_helper_sve_ldffbds_zsu,
5367 gen_helper_sve_ldffhds_le_zsu,
5368 gen_helper_sve_ldffsds_le_zsu,
5369 NULL, },
5370 { gen_helper_sve_ldffbdu_zsu,
5371 gen_helper_sve_ldffhdu_le_zsu,
5372 gen_helper_sve_ldffsdu_le_zsu,
5373 gen_helper_sve_ldffdd_le_zsu, } },
5374 { { gen_helper_sve_ldffbds_zss,
5375 gen_helper_sve_ldffhds_le_zss,
5376 gen_helper_sve_ldffsds_le_zss,
5377 NULL, },
5378 { gen_helper_sve_ldffbdu_zss,
5379 gen_helper_sve_ldffhdu_le_zss,
5380 gen_helper_sve_ldffsdu_le_zss,
5381 gen_helper_sve_ldffdd_le_zss, } },
5382 { { gen_helper_sve_ldffbds_zd,
5383 gen_helper_sve_ldffhds_le_zd,
5384 gen_helper_sve_ldffsds_le_zd,
5385 NULL, },
5386 { gen_helper_sve_ldffbdu_zd,
5387 gen_helper_sve_ldffhdu_le_zd,
5388 gen_helper_sve_ldffsdu_le_zd,
5389 gen_helper_sve_ldffdd_le_zd, } } } },
5390 { /* Big-endian */
5391 { { { gen_helper_sve_ldbds_zsu,
5392 gen_helper_sve_ldhds_be_zsu,
5393 gen_helper_sve_ldsds_be_zsu,
5394 NULL, },
5395 { gen_helper_sve_ldbdu_zsu,
5396 gen_helper_sve_ldhdu_be_zsu,
5397 gen_helper_sve_ldsdu_be_zsu,
5398 gen_helper_sve_lddd_be_zsu, } },
5399 { { gen_helper_sve_ldbds_zss,
5400 gen_helper_sve_ldhds_be_zss,
5401 gen_helper_sve_ldsds_be_zss,
5402 NULL, },
5403 { gen_helper_sve_ldbdu_zss,
5404 gen_helper_sve_ldhdu_be_zss,
5405 gen_helper_sve_ldsdu_be_zss,
5406 gen_helper_sve_lddd_be_zss, } },
5407 { { gen_helper_sve_ldbds_zd,
5408 gen_helper_sve_ldhds_be_zd,
5409 gen_helper_sve_ldsds_be_zd,
5410 NULL, },
5411 { gen_helper_sve_ldbdu_zd,
5412 gen_helper_sve_ldhdu_be_zd,
5413 gen_helper_sve_ldsdu_be_zd,
5414 gen_helper_sve_lddd_be_zd, } } },
5415
5416 /* First-fault */
5417 { { { gen_helper_sve_ldffbds_zsu,
5418 gen_helper_sve_ldffhds_be_zsu,
5419 gen_helper_sve_ldffsds_be_zsu,
5420 NULL, },
5421 { gen_helper_sve_ldffbdu_zsu,
5422 gen_helper_sve_ldffhdu_be_zsu,
5423 gen_helper_sve_ldffsdu_be_zsu,
5424 gen_helper_sve_ldffdd_be_zsu, } },
5425 { { gen_helper_sve_ldffbds_zss,
5426 gen_helper_sve_ldffhds_be_zss,
5427 gen_helper_sve_ldffsds_be_zss,
5428 NULL, },
5429 { gen_helper_sve_ldffbdu_zss,
5430 gen_helper_sve_ldffhdu_be_zss,
5431 gen_helper_sve_ldffsdu_be_zss,
5432 gen_helper_sve_ldffdd_be_zss, } },
5433 { { gen_helper_sve_ldffbds_zd,
5434 gen_helper_sve_ldffhds_be_zd,
5435 gen_helper_sve_ldffsds_be_zd,
5436 NULL, },
5437 { gen_helper_sve_ldffbdu_zd,
5438 gen_helper_sve_ldffhdu_be_zd,
5439 gen_helper_sve_ldffsdu_be_zd,
5440 gen_helper_sve_ldffdd_be_zd, } } } } },
5441 { /* MTE Active */
5442 { /* Little-endian */
5443 { { { gen_helper_sve_ldbds_zsu_mte,
5444 gen_helper_sve_ldhds_le_zsu_mte,
5445 gen_helper_sve_ldsds_le_zsu_mte,
5446 NULL, },
5447 { gen_helper_sve_ldbdu_zsu_mte,
5448 gen_helper_sve_ldhdu_le_zsu_mte,
5449 gen_helper_sve_ldsdu_le_zsu_mte,
5450 gen_helper_sve_lddd_le_zsu_mte, } },
5451 { { gen_helper_sve_ldbds_zss_mte,
5452 gen_helper_sve_ldhds_le_zss_mte,
5453 gen_helper_sve_ldsds_le_zss_mte,
5454 NULL, },
5455 { gen_helper_sve_ldbdu_zss_mte,
5456 gen_helper_sve_ldhdu_le_zss_mte,
5457 gen_helper_sve_ldsdu_le_zss_mte,
5458 gen_helper_sve_lddd_le_zss_mte, } },
5459 { { gen_helper_sve_ldbds_zd_mte,
5460 gen_helper_sve_ldhds_le_zd_mte,
5461 gen_helper_sve_ldsds_le_zd_mte,
5462 NULL, },
5463 { gen_helper_sve_ldbdu_zd_mte,
5464 gen_helper_sve_ldhdu_le_zd_mte,
5465 gen_helper_sve_ldsdu_le_zd_mte,
5466 gen_helper_sve_lddd_le_zd_mte, } } },
5467
5468 /* First-fault */
5469 { { { gen_helper_sve_ldffbds_zsu_mte,
5470 gen_helper_sve_ldffhds_le_zsu_mte,
5471 gen_helper_sve_ldffsds_le_zsu_mte,
5472 NULL, },
5473 { gen_helper_sve_ldffbdu_zsu_mte,
5474 gen_helper_sve_ldffhdu_le_zsu_mte,
5475 gen_helper_sve_ldffsdu_le_zsu_mte,
5476 gen_helper_sve_ldffdd_le_zsu_mte, } },
5477 { { gen_helper_sve_ldffbds_zss_mte,
5478 gen_helper_sve_ldffhds_le_zss_mte,
5479 gen_helper_sve_ldffsds_le_zss_mte,
5480 NULL, },
5481 { gen_helper_sve_ldffbdu_zss_mte,
5482 gen_helper_sve_ldffhdu_le_zss_mte,
5483 gen_helper_sve_ldffsdu_le_zss_mte,
5484 gen_helper_sve_ldffdd_le_zss_mte, } },
5485 { { gen_helper_sve_ldffbds_zd_mte,
5486 gen_helper_sve_ldffhds_le_zd_mte,
5487 gen_helper_sve_ldffsds_le_zd_mte,
5488 NULL, },
5489 { gen_helper_sve_ldffbdu_zd_mte,
5490 gen_helper_sve_ldffhdu_le_zd_mte,
5491 gen_helper_sve_ldffsdu_le_zd_mte,
5492 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5493 { /* Big-endian */
5494 { { { gen_helper_sve_ldbds_zsu_mte,
5495 gen_helper_sve_ldhds_be_zsu_mte,
5496 gen_helper_sve_ldsds_be_zsu_mte,
5497 NULL, },
5498 { gen_helper_sve_ldbdu_zsu_mte,
5499 gen_helper_sve_ldhdu_be_zsu_mte,
5500 gen_helper_sve_ldsdu_be_zsu_mte,
5501 gen_helper_sve_lddd_be_zsu_mte, } },
5502 { { gen_helper_sve_ldbds_zss_mte,
5503 gen_helper_sve_ldhds_be_zss_mte,
5504 gen_helper_sve_ldsds_be_zss_mte,
5505 NULL, },
5506 { gen_helper_sve_ldbdu_zss_mte,
5507 gen_helper_sve_ldhdu_be_zss_mte,
5508 gen_helper_sve_ldsdu_be_zss_mte,
5509 gen_helper_sve_lddd_be_zss_mte, } },
5510 { { gen_helper_sve_ldbds_zd_mte,
5511 gen_helper_sve_ldhds_be_zd_mte,
5512 gen_helper_sve_ldsds_be_zd_mte,
5513 NULL, },
5514 { gen_helper_sve_ldbdu_zd_mte,
5515 gen_helper_sve_ldhdu_be_zd_mte,
5516 gen_helper_sve_ldsdu_be_zd_mte,
5517 gen_helper_sve_lddd_be_zd_mte, } } },
5518
5519 /* First-fault */
5520 { { { gen_helper_sve_ldffbds_zsu_mte,
5521 gen_helper_sve_ldffhds_be_zsu_mte,
5522 gen_helper_sve_ldffsds_be_zsu_mte,
5523 NULL, },
5524 { gen_helper_sve_ldffbdu_zsu_mte,
5525 gen_helper_sve_ldffhdu_be_zsu_mte,
5526 gen_helper_sve_ldffsdu_be_zsu_mte,
5527 gen_helper_sve_ldffdd_be_zsu_mte, } },
5528 { { gen_helper_sve_ldffbds_zss_mte,
5529 gen_helper_sve_ldffhds_be_zss_mte,
5530 gen_helper_sve_ldffsds_be_zss_mte,
5531 NULL, },
5532 { gen_helper_sve_ldffbdu_zss_mte,
5533 gen_helper_sve_ldffhdu_be_zss_mte,
5534 gen_helper_sve_ldffsdu_be_zss_mte,
5535 gen_helper_sve_ldffdd_be_zss_mte, } },
5536 { { gen_helper_sve_ldffbds_zd_mte,
5537 gen_helper_sve_ldffhds_be_zd_mte,
5538 gen_helper_sve_ldffsds_be_zd_mte,
5539 NULL, },
5540 { gen_helper_sve_ldffbdu_zd_mte,
5541 gen_helper_sve_ldffhdu_be_zd_mte,
5542 gen_helper_sve_ldffsdu_be_zd_mte,
5543 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5544};
5545
3a7be554 5546static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5547{
5548 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5549 bool be = s->be_data == MO_BE;
5550 bool mte = s->mte_active[0];
673e9fa6
RH
5551
5552 if (!sve_access_check(s)) {
5553 return true;
5554 }
5555
5556 switch (a->esz) {
5557 case MO_32:
d28d12f0 5558 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5559 break;
5560 case MO_64:
d28d12f0 5561 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5562 break;
5563 }
5564 assert(fn != NULL);
5565
5566 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5567 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5568 return true;
5569}
5570
3a7be554 5571static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5572{
5573 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5574 bool be = s->be_data == MO_BE;
5575 bool mte = s->mte_active[0];
673e9fa6
RH
5576 TCGv_i64 imm;
5577
5578 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5579 return false;
5580 }
5581 if (!sve_access_check(s)) {
5582 return true;
5583 }
5584
5585 switch (a->esz) {
5586 case MO_32:
d28d12f0 5587 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5588 break;
5589 case MO_64:
d28d12f0 5590 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5591 break;
5592 }
5593 assert(fn != NULL);
5594
5595 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5596 * by loading the immediate into the scalar parameter.
5597 */
5598 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5599 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
673e9fa6
RH
5600 tcg_temp_free_i64(imm);
5601 return true;
5602}
5603
d28d12f0
RH
5604/* Indexed by [mte][be][xs][msz]. */
5605static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5606 { /* MTE Inactive */
5607 { /* Little-endian */
5608 { gen_helper_sve_stbs_zsu,
5609 gen_helper_sve_sths_le_zsu,
5610 gen_helper_sve_stss_le_zsu, },
5611 { gen_helper_sve_stbs_zss,
5612 gen_helper_sve_sths_le_zss,
5613 gen_helper_sve_stss_le_zss, } },
5614 { /* Big-endian */
5615 { gen_helper_sve_stbs_zsu,
5616 gen_helper_sve_sths_be_zsu,
5617 gen_helper_sve_stss_be_zsu, },
5618 { gen_helper_sve_stbs_zss,
5619 gen_helper_sve_sths_be_zss,
5620 gen_helper_sve_stss_be_zss, } } },
5621 { /* MTE Active */
5622 { /* Little-endian */
5623 { gen_helper_sve_stbs_zsu_mte,
5624 gen_helper_sve_sths_le_zsu_mte,
5625 gen_helper_sve_stss_le_zsu_mte, },
5626 { gen_helper_sve_stbs_zss_mte,
5627 gen_helper_sve_sths_le_zss_mte,
5628 gen_helper_sve_stss_le_zss_mte, } },
5629 { /* Big-endian */
5630 { gen_helper_sve_stbs_zsu_mte,
5631 gen_helper_sve_sths_be_zsu_mte,
5632 gen_helper_sve_stss_be_zsu_mte, },
5633 { gen_helper_sve_stbs_zss_mte,
5634 gen_helper_sve_sths_be_zss_mte,
5635 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5636};
5637
5638/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5639static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5640 { /* MTE Inactive */
5641 { /* Little-endian */
5642 { gen_helper_sve_stbd_zsu,
5643 gen_helper_sve_sthd_le_zsu,
5644 gen_helper_sve_stsd_le_zsu,
5645 gen_helper_sve_stdd_le_zsu, },
5646 { gen_helper_sve_stbd_zss,
5647 gen_helper_sve_sthd_le_zss,
5648 gen_helper_sve_stsd_le_zss,
5649 gen_helper_sve_stdd_le_zss, },
5650 { gen_helper_sve_stbd_zd,
5651 gen_helper_sve_sthd_le_zd,
5652 gen_helper_sve_stsd_le_zd,
5653 gen_helper_sve_stdd_le_zd, } },
5654 { /* Big-endian */
5655 { gen_helper_sve_stbd_zsu,
5656 gen_helper_sve_sthd_be_zsu,
5657 gen_helper_sve_stsd_be_zsu,
5658 gen_helper_sve_stdd_be_zsu, },
5659 { gen_helper_sve_stbd_zss,
5660 gen_helper_sve_sthd_be_zss,
5661 gen_helper_sve_stsd_be_zss,
5662 gen_helper_sve_stdd_be_zss, },
5663 { gen_helper_sve_stbd_zd,
5664 gen_helper_sve_sthd_be_zd,
5665 gen_helper_sve_stsd_be_zd,
5666 gen_helper_sve_stdd_be_zd, } } },
5667 { /* MTE Inactive */
5668 { /* Little-endian */
5669 { gen_helper_sve_stbd_zsu_mte,
5670 gen_helper_sve_sthd_le_zsu_mte,
5671 gen_helper_sve_stsd_le_zsu_mte,
5672 gen_helper_sve_stdd_le_zsu_mte, },
5673 { gen_helper_sve_stbd_zss_mte,
5674 gen_helper_sve_sthd_le_zss_mte,
5675 gen_helper_sve_stsd_le_zss_mte,
5676 gen_helper_sve_stdd_le_zss_mte, },
5677 { gen_helper_sve_stbd_zd_mte,
5678 gen_helper_sve_sthd_le_zd_mte,
5679 gen_helper_sve_stsd_le_zd_mte,
5680 gen_helper_sve_stdd_le_zd_mte, } },
5681 { /* Big-endian */
5682 { gen_helper_sve_stbd_zsu_mte,
5683 gen_helper_sve_sthd_be_zsu_mte,
5684 gen_helper_sve_stsd_be_zsu_mte,
5685 gen_helper_sve_stdd_be_zsu_mte, },
5686 { gen_helper_sve_stbd_zss_mte,
5687 gen_helper_sve_sthd_be_zss_mte,
5688 gen_helper_sve_stsd_be_zss_mte,
5689 gen_helper_sve_stdd_be_zss_mte, },
5690 { gen_helper_sve_stbd_zd_mte,
5691 gen_helper_sve_sthd_be_zd_mte,
5692 gen_helper_sve_stsd_be_zd_mte,
5693 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5694};
5695
3a7be554 5696static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5697{
f6dbf62a 5698 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5699 bool be = s->be_data == MO_BE;
5700 bool mte = s->mte_active[0];
f6dbf62a
RH
5701
5702 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5703 return false;
5704 }
5705 if (!sve_access_check(s)) {
5706 return true;
5707 }
5708 switch (a->esz) {
5709 case MO_32:
d28d12f0 5710 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5711 break;
5712 case MO_64:
d28d12f0 5713 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5714 break;
5715 default:
5716 g_assert_not_reached();
5717 }
5718 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5719 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5720 return true;
5721}
dec6cf6b 5722
3a7be554 5723static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5724{
5725 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5726 bool be = s->be_data == MO_BE;
5727 bool mte = s->mte_active[0];
408ecde9
RH
5728 TCGv_i64 imm;
5729
5730 if (a->esz < a->msz) {
5731 return false;
5732 }
5733 if (!sve_access_check(s)) {
5734 return true;
5735 }
5736
5737 switch (a->esz) {
5738 case MO_32:
d28d12f0 5739 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5740 break;
5741 case MO_64:
d28d12f0 5742 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5743 break;
5744 }
5745 assert(fn != NULL);
5746
5747 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5748 * by loading the immediate into the scalar parameter.
5749 */
5750 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5751 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
408ecde9
RH
5752 tcg_temp_free_i64(imm);
5753 return true;
5754}
5755
dec6cf6b
RH
5756/*
5757 * Prefetches
5758 */
5759
3a7be554 5760static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
5761{
5762 /* Prefetch is a nop within QEMU. */
2f95a3b0 5763 (void)sve_access_check(s);
dec6cf6b
RH
5764 return true;
5765}
5766
3a7be554 5767static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
5768{
5769 if (a->rm == 31) {
5770 return false;
5771 }
5772 /* Prefetch is a nop within QEMU. */
2f95a3b0 5773 (void)sve_access_check(s);
dec6cf6b
RH
5774 return true;
5775}
a2103582
RH
5776
5777/*
5778 * Move Prefix
5779 *
5780 * TODO: The implementation so far could handle predicated merging movprfx.
5781 * The helper functions as written take an extra source register to
5782 * use in the operation, but the result is only written when predication
5783 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5784 * to allow the final write back to the destination to be unconditional.
5785 * For predicated zeroing movprfx, we need to rearrange the helpers to
5786 * allow the final write back to zero inactives.
5787 *
5788 * In the meantime, just emit the moves.
5789 */
5790
3a7be554 5791static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
5792{
5793 return do_mov_z(s, a->rd, a->rn);
5794}
5795
3a7be554 5796static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
5797{
5798 if (sve_access_check(s)) {
5799 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5800 }
5801 return true;
5802}
5803
3a7be554 5804static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 5805{
60245996 5806 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 5807}