]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Enforce alignment for aa64 vector LDn/STn (single)
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
50f57e09 9 * version 2.1 of the License, or (at your option) any later version.
38388f7e
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
cc48affe 35#include "fpu/softfloat.h"
38388f7e 36
757f9cff 37
9ee3a611
RH
38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
38cadeba
RH
41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
757f9cff
RH
43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
c4e7c493 46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 49
ccd841c3
RH
50/*
51 * Helpers for extracting complex instruction fields.
52 */
53
54/* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
451e4ffd 57static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
58{
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61}
62
451e4ffd 63static int tszimm_shr(DisasContext *s, int x)
ccd841c3 64{
451e4ffd 65 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
66}
67
68/* See e.g. LSL (immediate, predicated). */
451e4ffd 69static int tszimm_shl(DisasContext *s, int x)
ccd841c3 70{
451e4ffd 71 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
72}
73
451e4ffd 74static inline int plus1(DisasContext *s, int x)
24e82e68
RH
75{
76 return x + 1;
77}
78
f25a2361 79/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 80static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
81{
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83}
84
451e4ffd 85static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
86{
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88}
89
c4e7c493
RH
90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
451e4ffd 93static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
94{
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97}
98
38388f7e
RH
99/*
100 * Include the generated decoder.
101 */
102
139c1837 103#include "decode-sve.c.inc"
38388f7e
RH
104
105/*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
d1822297
RH
109/* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112static inline int pred_full_reg_offset(DisasContext *s, int regno)
113{
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115}
116
117/* Return the byte size of the whole predicate register, VL / 64. */
118static inline int pred_full_reg_size(DisasContext *s)
119{
120 return s->sve_len >> 3;
121}
122
516e246a
RH
123/* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131static int size_for_gvec(int size)
132{
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138}
139
140static int pred_gvec_reg_size(DisasContext *s)
141{
142 return size_for_gvec(pred_full_reg_size(s));
143}
144
40e32e5a
RH
145/* Invoke an out-of-line helper on 2 Zregs. */
146static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
147 int rd, int rn, int data)
148{
149 unsigned vsz = vec_full_reg_size(s);
150 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn),
152 vsz, vsz, data, fn);
153}
154
e645d1a1
RH
155/* Invoke an out-of-line helper on 3 Zregs. */
156static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
157 int rd, int rn, int rm, int data)
158{
159 unsigned vsz = vec_full_reg_size(s);
160 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm),
163 vsz, vsz, data, fn);
164}
165
96a461f7
RH
166/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
167static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
168 int rd, int rn, int pg, int data)
169{
170 unsigned vsz = vec_full_reg_size(s);
171 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
172 vec_full_reg_offset(s, rn),
173 pred_full_reg_offset(s, pg),
174 vsz, vsz, data, fn);
175}
176
36cbb7a8
RH
177/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
178static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
179 int rd, int rn, int rm, int pg, int data)
180{
181 unsigned vsz = vec_full_reg_size(s);
182 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
183 vec_full_reg_offset(s, rn),
184 vec_full_reg_offset(s, rm),
185 pred_full_reg_offset(s, pg),
186 vsz, vsz, data, fn);
187}
f7d79c41 188
36cbb7a8 189/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
190static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
191 int esz, int rd, int rn)
38388f7e 192{
f7d79c41
RH
193 unsigned vsz = vec_full_reg_size(s);
194 gvec_fn(esz, vec_full_reg_offset(s, rd),
195 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
196}
197
39eea561 198/* Invoke a vector expander on three Zregs. */
28c4da31
RH
199static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
200 int esz, int rd, int rn, int rm)
38388f7e 201{
28c4da31
RH
202 unsigned vsz = vec_full_reg_size(s);
203 gvec_fn(esz, vec_full_reg_offset(s, rd),
204 vec_full_reg_offset(s, rn),
205 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
206}
207
39eea561
RH
208/* Invoke a vector move on two Zregs. */
209static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 210{
f7d79c41
RH
211 if (sve_access_check(s)) {
212 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
213 }
214 return true;
38388f7e
RH
215}
216
d9d78dcc
RH
217/* Initialize a Zreg with replications of a 64-bit immediate. */
218static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
219{
220 unsigned vsz = vec_full_reg_size(s);
8711e71f 221 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
222}
223
516e246a 224/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
225static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
226 int rd, int rn, int rm)
516e246a 227{
dd81a8d7
RH
228 unsigned psz = pred_gvec_reg_size(s);
229 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
230 pred_full_reg_offset(s, rn),
231 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
232}
233
234/* Invoke a vector move on two Pregs. */
235static bool do_mov_p(DisasContext *s, int rd, int rn)
236{
d0b2df5a
RH
237 if (sve_access_check(s)) {
238 unsigned psz = pred_gvec_reg_size(s);
239 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
240 pred_full_reg_offset(s, rn), psz, psz);
241 }
242 return true;
516e246a
RH
243}
244
9e18d7a6
RH
245/* Set the cpu flags as per a return from an SVE helper. */
246static void do_pred_flags(TCGv_i32 t)
247{
248 tcg_gen_mov_i32(cpu_NF, t);
249 tcg_gen_andi_i32(cpu_ZF, t, 2);
250 tcg_gen_andi_i32(cpu_CF, t, 1);
251 tcg_gen_movi_i32(cpu_VF, 0);
252}
253
254/* Subroutines computing the ARM PredTest psuedofunction. */
255static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
256{
257 TCGv_i32 t = tcg_temp_new_i32();
258
259 gen_helper_sve_predtest1(t, d, g);
260 do_pred_flags(t);
261 tcg_temp_free_i32(t);
262}
263
264static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
265{
266 TCGv_ptr dptr = tcg_temp_new_ptr();
267 TCGv_ptr gptr = tcg_temp_new_ptr();
268 TCGv_i32 t;
269
270 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
271 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
272 t = tcg_const_i32(words);
273
274 gen_helper_sve_predtest(t, dptr, gptr, t);
275 tcg_temp_free_ptr(dptr);
276 tcg_temp_free_ptr(gptr);
277
278 do_pred_flags(t);
279 tcg_temp_free_i32(t);
280}
281
028e2a7b
RH
282/* For each element size, the bits within a predicate word that are active. */
283const uint64_t pred_esz_masks[4] = {
284 0xffffffffffffffffull, 0x5555555555555555ull,
285 0x1111111111111111ull, 0x0101010101010101ull
286};
287
39eea561
RH
288/*
289 *** SVE Logical - Unpredicated Group
290 */
291
28c4da31
RH
292static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
293{
294 if (sve_access_check(s)) {
295 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
296 }
297 return true;
298}
299
3a7be554 300static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 301{
28c4da31 302 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
303}
304
3a7be554 305static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 306{
28c4da31 307 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
308}
309
3a7be554 310static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 311{
28c4da31 312 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
313}
314
3a7be554 315static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 316{
28c4da31 317 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 318}
d1822297 319
fea98f9c
RH
320/*
321 *** SVE Integer Arithmetic - Unpredicated Group
322 */
323
3a7be554 324static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 325{
28c4da31 326 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
327}
328
3a7be554 329static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 330{
28c4da31 331 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
332}
333
3a7be554 334static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 335{
28c4da31 336 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
337}
338
3a7be554 339static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 340{
28c4da31 341 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
342}
343
3a7be554 344static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 345{
28c4da31 346 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
347}
348
3a7be554 349static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 350{
28c4da31 351 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
352}
353
f97cfd59
RH
354/*
355 *** SVE Integer Arithmetic - Binary Predicated Group
356 */
357
358static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
359{
f97cfd59
RH
360 if (fn == NULL) {
361 return false;
362 }
363 if (sve_access_check(s)) {
36cbb7a8 364 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
365 }
366 return true;
367}
368
a2103582
RH
369/* Select active elememnts from Zn and inactive elements from Zm,
370 * storing the result in Zd.
371 */
372static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
373{
374 static gen_helper_gvec_4 * const fns[4] = {
375 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
376 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
377 };
36cbb7a8 378 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
379}
380
f97cfd59 381#define DO_ZPZZ(NAME, name) \
3a7be554 382static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
383{ \
384 static gen_helper_gvec_4 * const fns[4] = { \
385 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
386 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
387 }; \
388 return do_zpzz_ool(s, a, fns[a->esz]); \
389}
390
391DO_ZPZZ(AND, and)
392DO_ZPZZ(EOR, eor)
393DO_ZPZZ(ORR, orr)
394DO_ZPZZ(BIC, bic)
395
396DO_ZPZZ(ADD, add)
397DO_ZPZZ(SUB, sub)
398
399DO_ZPZZ(SMAX, smax)
400DO_ZPZZ(UMAX, umax)
401DO_ZPZZ(SMIN, smin)
402DO_ZPZZ(UMIN, umin)
403DO_ZPZZ(SABD, sabd)
404DO_ZPZZ(UABD, uabd)
405
406DO_ZPZZ(MUL, mul)
407DO_ZPZZ(SMULH, smulh)
408DO_ZPZZ(UMULH, umulh)
409
27721dbb
RH
410DO_ZPZZ(ASR, asr)
411DO_ZPZZ(LSR, lsr)
412DO_ZPZZ(LSL, lsl)
413
3a7be554 414static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
415{
416 static gen_helper_gvec_4 * const fns[4] = {
417 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
418 };
419 return do_zpzz_ool(s, a, fns[a->esz]);
420}
421
3a7be554 422static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
423{
424 static gen_helper_gvec_4 * const fns[4] = {
425 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
426 };
427 return do_zpzz_ool(s, a, fns[a->esz]);
428}
429
3a7be554 430static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
431{
432 if (sve_access_check(s)) {
433 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
434 }
435 return true;
436}
d3fe4a29 437
f97cfd59
RH
438#undef DO_ZPZZ
439
afac6d04
RH
440/*
441 *** SVE Integer Arithmetic - Unary Predicated Group
442 */
443
444static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
445{
446 if (fn == NULL) {
447 return false;
448 }
449 if (sve_access_check(s)) {
96a461f7 450 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
afac6d04
RH
451 }
452 return true;
453}
454
455#define DO_ZPZ(NAME, name) \
3a7be554 456static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
457{ \
458 static gen_helper_gvec_3 * const fns[4] = { \
459 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
460 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
461 }; \
462 return do_zpz_ool(s, a, fns[a->esz]); \
463}
464
465DO_ZPZ(CLS, cls)
466DO_ZPZ(CLZ, clz)
467DO_ZPZ(CNT_zpz, cnt_zpz)
468DO_ZPZ(CNOT, cnot)
469DO_ZPZ(NOT_zpz, not_zpz)
470DO_ZPZ(ABS, abs)
471DO_ZPZ(NEG, neg)
472
3a7be554 473static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
474{
475 static gen_helper_gvec_3 * const fns[4] = {
476 NULL,
477 gen_helper_sve_fabs_h,
478 gen_helper_sve_fabs_s,
479 gen_helper_sve_fabs_d
480 };
481 return do_zpz_ool(s, a, fns[a->esz]);
482}
483
3a7be554 484static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
485{
486 static gen_helper_gvec_3 * const fns[4] = {
487 NULL,
488 gen_helper_sve_fneg_h,
489 gen_helper_sve_fneg_s,
490 gen_helper_sve_fneg_d
491 };
492 return do_zpz_ool(s, a, fns[a->esz]);
493}
494
3a7be554 495static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
496{
497 static gen_helper_gvec_3 * const fns[4] = {
498 NULL,
499 gen_helper_sve_sxtb_h,
500 gen_helper_sve_sxtb_s,
501 gen_helper_sve_sxtb_d
502 };
503 return do_zpz_ool(s, a, fns[a->esz]);
504}
505
3a7be554 506static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
507{
508 static gen_helper_gvec_3 * const fns[4] = {
509 NULL,
510 gen_helper_sve_uxtb_h,
511 gen_helper_sve_uxtb_s,
512 gen_helper_sve_uxtb_d
513 };
514 return do_zpz_ool(s, a, fns[a->esz]);
515}
516
3a7be554 517static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
518{
519 static gen_helper_gvec_3 * const fns[4] = {
520 NULL, NULL,
521 gen_helper_sve_sxth_s,
522 gen_helper_sve_sxth_d
523 };
524 return do_zpz_ool(s, a, fns[a->esz]);
525}
526
3a7be554 527static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
528{
529 static gen_helper_gvec_3 * const fns[4] = {
530 NULL, NULL,
531 gen_helper_sve_uxth_s,
532 gen_helper_sve_uxth_d
533 };
534 return do_zpz_ool(s, a, fns[a->esz]);
535}
536
3a7be554 537static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
538{
539 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
540}
541
3a7be554 542static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
543{
544 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
545}
546
547#undef DO_ZPZ
548
047cec97
RH
549/*
550 *** SVE Integer Reduction Group
551 */
552
553typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
554static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
555 gen_helper_gvec_reduc *fn)
556{
557 unsigned vsz = vec_full_reg_size(s);
558 TCGv_ptr t_zn, t_pg;
559 TCGv_i32 desc;
560 TCGv_i64 temp;
561
562 if (fn == NULL) {
563 return false;
564 }
565 if (!sve_access_check(s)) {
566 return true;
567 }
568
569 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
570 temp = tcg_temp_new_i64();
571 t_zn = tcg_temp_new_ptr();
572 t_pg = tcg_temp_new_ptr();
573
574 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
575 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
576 fn(temp, t_zn, t_pg, desc);
577 tcg_temp_free_ptr(t_zn);
578 tcg_temp_free_ptr(t_pg);
579 tcg_temp_free_i32(desc);
580
581 write_fp_dreg(s, a->rd, temp);
582 tcg_temp_free_i64(temp);
583 return true;
584}
585
586#define DO_VPZ(NAME, name) \
3a7be554 587static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
588{ \
589 static gen_helper_gvec_reduc * const fns[4] = { \
590 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
591 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
592 }; \
593 return do_vpz_ool(s, a, fns[a->esz]); \
594}
595
596DO_VPZ(ORV, orv)
597DO_VPZ(ANDV, andv)
598DO_VPZ(EORV, eorv)
599
600DO_VPZ(UADDV, uaddv)
601DO_VPZ(SMAXV, smaxv)
602DO_VPZ(UMAXV, umaxv)
603DO_VPZ(SMINV, sminv)
604DO_VPZ(UMINV, uminv)
605
3a7be554 606static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
607{
608 static gen_helper_gvec_reduc * const fns[4] = {
609 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
610 gen_helper_sve_saddv_s, NULL
611 };
612 return do_vpz_ool(s, a, fns[a->esz]);
613}
614
615#undef DO_VPZ
616
ccd841c3
RH
617/*
618 *** SVE Shift by Immediate - Predicated Group
619 */
620
60245996
RH
621/*
622 * Copy Zn into Zd, storing zeros into inactive elements.
623 * If invert, store zeros into the active elements.
ccd841c3 624 */
60245996
RH
625static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
626 int esz, bool invert)
ccd841c3 627{
60245996
RH
628 static gen_helper_gvec_3 * const fns[4] = {
629 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
630 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 631 };
60245996 632
ccd841c3 633 if (sve_access_check(s)) {
96a461f7 634 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
635 }
636 return true;
637}
638
639static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
640 gen_helper_gvec_3 *fn)
641{
642 if (sve_access_check(s)) {
96a461f7 643 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
644 }
645 return true;
646}
647
3a7be554 648static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
649{
650 static gen_helper_gvec_3 * const fns[4] = {
651 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
652 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
653 };
654 if (a->esz < 0) {
655 /* Invalid tsz encoding -- see tszimm_esz. */
656 return false;
657 }
658 /* Shift by element size is architecturally valid. For
659 arithmetic right-shift, it's the same as by one less. */
660 a->imm = MIN(a->imm, (8 << a->esz) - 1);
661 return do_zpzi_ool(s, a, fns[a->esz]);
662}
663
3a7be554 664static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
665{
666 static gen_helper_gvec_3 * const fns[4] = {
667 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
668 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
669 };
670 if (a->esz < 0) {
671 return false;
672 }
673 /* Shift by element size is architecturally valid.
674 For logical shifts, it is a zeroing operation. */
675 if (a->imm >= (8 << a->esz)) {
60245996 676 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
677 } else {
678 return do_zpzi_ool(s, a, fns[a->esz]);
679 }
680}
681
3a7be554 682static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
683{
684 static gen_helper_gvec_3 * const fns[4] = {
685 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
686 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
687 };
688 if (a->esz < 0) {
689 return false;
690 }
691 /* Shift by element size is architecturally valid.
692 For logical shifts, it is a zeroing operation. */
693 if (a->imm >= (8 << a->esz)) {
60245996 694 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
695 } else {
696 return do_zpzi_ool(s, a, fns[a->esz]);
697 }
698}
699
3a7be554 700static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
701{
702 static gen_helper_gvec_3 * const fns[4] = {
703 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
704 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
705 };
706 if (a->esz < 0) {
707 return false;
708 }
709 /* Shift by element size is architecturally valid. For arithmetic
710 right shift for division, it is a zeroing operation. */
711 if (a->imm >= (8 << a->esz)) {
60245996 712 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
713 } else {
714 return do_zpzi_ool(s, a, fns[a->esz]);
715 }
716}
717
fe7f8dfb
RH
718/*
719 *** SVE Bitwise Shift - Predicated Group
720 */
721
722#define DO_ZPZW(NAME, name) \
3a7be554 723static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
724{ \
725 static gen_helper_gvec_4 * const fns[3] = { \
726 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
727 gen_helper_sve_##name##_zpzw_s, \
728 }; \
729 if (a->esz < 0 || a->esz >= 3) { \
730 return false; \
731 } \
732 return do_zpzz_ool(s, a, fns[a->esz]); \
733}
734
735DO_ZPZW(ASR, asr)
736DO_ZPZW(LSR, lsr)
737DO_ZPZW(LSL, lsl)
738
739#undef DO_ZPZW
740
d9d78dcc
RH
741/*
742 *** SVE Bitwise Shift - Unpredicated Group
743 */
744
745static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
746 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
747 int64_t, uint32_t, uint32_t))
748{
749 if (a->esz < 0) {
750 /* Invalid tsz encoding -- see tszimm_esz. */
751 return false;
752 }
753 if (sve_access_check(s)) {
754 unsigned vsz = vec_full_reg_size(s);
755 /* Shift by element size is architecturally valid. For
756 arithmetic right-shift, it's the same as by one less.
757 Otherwise it is a zeroing operation. */
758 if (a->imm >= 8 << a->esz) {
759 if (asr) {
760 a->imm = (8 << a->esz) - 1;
761 } else {
762 do_dupi_z(s, a->rd, 0);
763 return true;
764 }
765 }
766 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
767 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
768 }
769 return true;
770}
771
3a7be554 772static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
773{
774 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
775}
776
3a7be554 777static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
778{
779 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
780}
781
3a7be554 782static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
783{
784 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
785}
786
787static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
788{
789 if (fn == NULL) {
790 return false;
791 }
792 if (sve_access_check(s)) {
e645d1a1 793 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
d9d78dcc
RH
794 }
795 return true;
796}
797
798#define DO_ZZW(NAME, name) \
3a7be554 799static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
800{ \
801 static gen_helper_gvec_3 * const fns[4] = { \
802 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
803 gen_helper_sve_##name##_zzw_s, NULL \
804 }; \
805 return do_zzw_ool(s, a, fns[a->esz]); \
806}
807
808DO_ZZW(ASR, asr)
809DO_ZZW(LSR, lsr)
810DO_ZZW(LSL, lsl)
811
812#undef DO_ZZW
813
96a36e4a
RH
814/*
815 *** SVE Integer Multiply-Add Group
816 */
817
818static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
819 gen_helper_gvec_5 *fn)
820{
821 if (sve_access_check(s)) {
822 unsigned vsz = vec_full_reg_size(s);
823 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
824 vec_full_reg_offset(s, a->ra),
825 vec_full_reg_offset(s, a->rn),
826 vec_full_reg_offset(s, a->rm),
827 pred_full_reg_offset(s, a->pg),
828 vsz, vsz, 0, fn);
829 }
830 return true;
831}
832
833#define DO_ZPZZZ(NAME, name) \
3a7be554 834static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
835{ \
836 static gen_helper_gvec_5 * const fns[4] = { \
837 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
838 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
839 }; \
840 return do_zpzzz_ool(s, a, fns[a->esz]); \
841}
842
843DO_ZPZZZ(MLA, mla)
844DO_ZPZZZ(MLS, mls)
845
846#undef DO_ZPZZZ
847
9a56c9c3
RH
848/*
849 *** SVE Index Generation Group
850 */
851
852static void do_index(DisasContext *s, int esz, int rd,
853 TCGv_i64 start, TCGv_i64 incr)
854{
855 unsigned vsz = vec_full_reg_size(s);
856 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
857 TCGv_ptr t_zd = tcg_temp_new_ptr();
858
859 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
860 if (esz == 3) {
861 gen_helper_sve_index_d(t_zd, start, incr, desc);
862 } else {
863 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
864 static index_fn * const fns[3] = {
865 gen_helper_sve_index_b,
866 gen_helper_sve_index_h,
867 gen_helper_sve_index_s,
868 };
869 TCGv_i32 s32 = tcg_temp_new_i32();
870 TCGv_i32 i32 = tcg_temp_new_i32();
871
872 tcg_gen_extrl_i64_i32(s32, start);
873 tcg_gen_extrl_i64_i32(i32, incr);
874 fns[esz](t_zd, s32, i32, desc);
875
876 tcg_temp_free_i32(s32);
877 tcg_temp_free_i32(i32);
878 }
879 tcg_temp_free_ptr(t_zd);
880 tcg_temp_free_i32(desc);
881}
882
3a7be554 883static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
884{
885 if (sve_access_check(s)) {
886 TCGv_i64 start = tcg_const_i64(a->imm1);
887 TCGv_i64 incr = tcg_const_i64(a->imm2);
888 do_index(s, a->esz, a->rd, start, incr);
889 tcg_temp_free_i64(start);
890 tcg_temp_free_i64(incr);
891 }
892 return true;
893}
894
3a7be554 895static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
896{
897 if (sve_access_check(s)) {
898 TCGv_i64 start = tcg_const_i64(a->imm);
899 TCGv_i64 incr = cpu_reg(s, a->rm);
900 do_index(s, a->esz, a->rd, start, incr);
901 tcg_temp_free_i64(start);
902 }
903 return true;
904}
905
3a7be554 906static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
907{
908 if (sve_access_check(s)) {
909 TCGv_i64 start = cpu_reg(s, a->rn);
910 TCGv_i64 incr = tcg_const_i64(a->imm);
911 do_index(s, a->esz, a->rd, start, incr);
912 tcg_temp_free_i64(incr);
913 }
914 return true;
915}
916
3a7be554 917static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
918{
919 if (sve_access_check(s)) {
920 TCGv_i64 start = cpu_reg(s, a->rn);
921 TCGv_i64 incr = cpu_reg(s, a->rm);
922 do_index(s, a->esz, a->rd, start, incr);
923 }
924 return true;
925}
926
96f922cc
RH
927/*
928 *** SVE Stack Allocation Group
929 */
930
3a7be554 931static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 932{
5de56742
AC
933 if (sve_access_check(s)) {
934 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
935 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
936 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
937 }
96f922cc
RH
938 return true;
939}
940
3a7be554 941static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 942{
5de56742
AC
943 if (sve_access_check(s)) {
944 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
945 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
946 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
947 }
96f922cc
RH
948 return true;
949}
950
3a7be554 951static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 952{
5de56742
AC
953 if (sve_access_check(s)) {
954 TCGv_i64 reg = cpu_reg(s, a->rd);
955 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
956 }
96f922cc
RH
957 return true;
958}
959
4b242d9c
RH
960/*
961 *** SVE Compute Vector Address Group
962 */
963
964static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
965{
966 if (sve_access_check(s)) {
e645d1a1 967 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
968 }
969 return true;
970}
971
3a7be554 972static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
973{
974 return do_adr(s, a, gen_helper_sve_adr_p32);
975}
976
3a7be554 977static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
978{
979 return do_adr(s, a, gen_helper_sve_adr_p64);
980}
981
3a7be554 982static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
983{
984 return do_adr(s, a, gen_helper_sve_adr_s32);
985}
986
3a7be554 987static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
988{
989 return do_adr(s, a, gen_helper_sve_adr_u32);
990}
991
0762cd42
RH
992/*
993 *** SVE Integer Misc - Unpredicated Group
994 */
995
3a7be554 996static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
0762cd42
RH
997{
998 static gen_helper_gvec_2 * const fns[4] = {
999 NULL,
1000 gen_helper_sve_fexpa_h,
1001 gen_helper_sve_fexpa_s,
1002 gen_helper_sve_fexpa_d,
1003 };
1004 if (a->esz == 0) {
1005 return false;
1006 }
1007 if (sve_access_check(s)) {
40e32e5a 1008 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
0762cd42
RH
1009 }
1010 return true;
1011}
1012
3a7be554 1013static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1014{
1015 static gen_helper_gvec_3 * const fns[4] = {
1016 NULL,
1017 gen_helper_sve_ftssel_h,
1018 gen_helper_sve_ftssel_s,
1019 gen_helper_sve_ftssel_d,
1020 };
1021 if (a->esz == 0) {
1022 return false;
1023 }
1024 if (sve_access_check(s)) {
e645d1a1 1025 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
a1f233f2
RH
1026 }
1027 return true;
1028}
1029
516e246a
RH
1030/*
1031 *** SVE Predicate Logical Operations Group
1032 */
1033
1034static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1035 const GVecGen4 *gvec_op)
1036{
1037 if (!sve_access_check(s)) {
1038 return true;
1039 }
1040
1041 unsigned psz = pred_gvec_reg_size(s);
1042 int dofs = pred_full_reg_offset(s, a->rd);
1043 int nofs = pred_full_reg_offset(s, a->rn);
1044 int mofs = pred_full_reg_offset(s, a->rm);
1045 int gofs = pred_full_reg_offset(s, a->pg);
1046
dd81a8d7
RH
1047 if (!a->s) {
1048 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1049 return true;
1050 }
1051
516e246a
RH
1052 if (psz == 8) {
1053 /* Do the operation and the flags generation in temps. */
1054 TCGv_i64 pd = tcg_temp_new_i64();
1055 TCGv_i64 pn = tcg_temp_new_i64();
1056 TCGv_i64 pm = tcg_temp_new_i64();
1057 TCGv_i64 pg = tcg_temp_new_i64();
1058
1059 tcg_gen_ld_i64(pn, cpu_env, nofs);
1060 tcg_gen_ld_i64(pm, cpu_env, mofs);
1061 tcg_gen_ld_i64(pg, cpu_env, gofs);
1062
1063 gvec_op->fni8(pd, pn, pm, pg);
1064 tcg_gen_st_i64(pd, cpu_env, dofs);
1065
1066 do_predtest1(pd, pg);
1067
1068 tcg_temp_free_i64(pd);
1069 tcg_temp_free_i64(pn);
1070 tcg_temp_free_i64(pm);
1071 tcg_temp_free_i64(pg);
1072 } else {
1073 /* The operation and flags generation is large. The computation
1074 * of the flags depends on the original contents of the guarding
1075 * predicate. If the destination overwrites the guarding predicate,
1076 * then the easiest way to get this right is to save a copy.
1077 */
1078 int tofs = gofs;
1079 if (a->rd == a->pg) {
1080 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1081 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1082 }
1083
1084 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1085 do_predtest(s, dofs, tofs, psz / 8);
1086 }
1087 return true;
1088}
1089
1090static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1091{
1092 tcg_gen_and_i64(pd, pn, pm);
1093 tcg_gen_and_i64(pd, pd, pg);
1094}
1095
1096static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1097 TCGv_vec pm, TCGv_vec pg)
1098{
1099 tcg_gen_and_vec(vece, pd, pn, pm);
1100 tcg_gen_and_vec(vece, pd, pd, pg);
1101}
1102
3a7be554 1103static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1104{
1105 static const GVecGen4 op = {
1106 .fni8 = gen_and_pg_i64,
1107 .fniv = gen_and_pg_vec,
1108 .fno = gen_helper_sve_and_pppp,
1109 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1110 };
dd81a8d7
RH
1111
1112 if (!a->s) {
1113 if (!sve_access_check(s)) {
1114 return true;
1115 }
1116 if (a->rn == a->rm) {
1117 if (a->pg == a->rn) {
1118 do_mov_p(s, a->rd, a->rn);
1119 } else {
1120 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1121 }
1122 return true;
1123 } else if (a->pg == a->rn || a->pg == a->rm) {
1124 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1125 return true;
516e246a 1126 }
516e246a 1127 }
dd81a8d7 1128 return do_pppp_flags(s, a, &op);
516e246a
RH
1129}
1130
1131static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1132{
1133 tcg_gen_andc_i64(pd, pn, pm);
1134 tcg_gen_and_i64(pd, pd, pg);
1135}
1136
1137static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1138 TCGv_vec pm, TCGv_vec pg)
1139{
1140 tcg_gen_andc_vec(vece, pd, pn, pm);
1141 tcg_gen_and_vec(vece, pd, pd, pg);
1142}
1143
3a7be554 1144static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1145{
1146 static const GVecGen4 op = {
1147 .fni8 = gen_bic_pg_i64,
1148 .fniv = gen_bic_pg_vec,
1149 .fno = gen_helper_sve_bic_pppp,
1150 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1151 };
dd81a8d7
RH
1152
1153 if (!a->s && a->pg == a->rn) {
1154 if (sve_access_check(s)) {
1155 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1156 }
1157 return true;
516e246a 1158 }
dd81a8d7 1159 return do_pppp_flags(s, a, &op);
516e246a
RH
1160}
1161
1162static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1163{
1164 tcg_gen_xor_i64(pd, pn, pm);
1165 tcg_gen_and_i64(pd, pd, pg);
1166}
1167
1168static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1169 TCGv_vec pm, TCGv_vec pg)
1170{
1171 tcg_gen_xor_vec(vece, pd, pn, pm);
1172 tcg_gen_and_vec(vece, pd, pd, pg);
1173}
1174
3a7be554 1175static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1176{
1177 static const GVecGen4 op = {
1178 .fni8 = gen_eor_pg_i64,
1179 .fniv = gen_eor_pg_vec,
1180 .fno = gen_helper_sve_eor_pppp,
1181 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1182 };
dd81a8d7 1183 return do_pppp_flags(s, a, &op);
516e246a
RH
1184}
1185
3a7be554 1186static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1187{
516e246a
RH
1188 if (a->s) {
1189 return false;
516e246a 1190 }
d4bc6232
RH
1191 if (sve_access_check(s)) {
1192 unsigned psz = pred_gvec_reg_size(s);
1193 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1194 pred_full_reg_offset(s, a->pg),
1195 pred_full_reg_offset(s, a->rn),
1196 pred_full_reg_offset(s, a->rm), psz, psz);
1197 }
1198 return true;
516e246a
RH
1199}
1200
1201static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1202{
1203 tcg_gen_or_i64(pd, pn, pm);
1204 tcg_gen_and_i64(pd, pd, pg);
1205}
1206
1207static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1208 TCGv_vec pm, TCGv_vec pg)
1209{
1210 tcg_gen_or_vec(vece, pd, pn, pm);
1211 tcg_gen_and_vec(vece, pd, pd, pg);
1212}
1213
3a7be554 1214static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1215{
1216 static const GVecGen4 op = {
1217 .fni8 = gen_orr_pg_i64,
1218 .fniv = gen_orr_pg_vec,
1219 .fno = gen_helper_sve_orr_pppp,
1220 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1221 };
dd81a8d7
RH
1222
1223 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1224 return do_mov_p(s, a->rd, a->rn);
516e246a 1225 }
dd81a8d7 1226 return do_pppp_flags(s, a, &op);
516e246a
RH
1227}
1228
1229static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1230{
1231 tcg_gen_orc_i64(pd, pn, pm);
1232 tcg_gen_and_i64(pd, pd, pg);
1233}
1234
1235static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1236 TCGv_vec pm, TCGv_vec pg)
1237{
1238 tcg_gen_orc_vec(vece, pd, pn, pm);
1239 tcg_gen_and_vec(vece, pd, pd, pg);
1240}
1241
3a7be554 1242static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1243{
1244 static const GVecGen4 op = {
1245 .fni8 = gen_orn_pg_i64,
1246 .fniv = gen_orn_pg_vec,
1247 .fno = gen_helper_sve_orn_pppp,
1248 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1249 };
dd81a8d7 1250 return do_pppp_flags(s, a, &op);
516e246a
RH
1251}
1252
1253static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1254{
1255 tcg_gen_or_i64(pd, pn, pm);
1256 tcg_gen_andc_i64(pd, pg, pd);
1257}
1258
1259static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1260 TCGv_vec pm, TCGv_vec pg)
1261{
1262 tcg_gen_or_vec(vece, pd, pn, pm);
1263 tcg_gen_andc_vec(vece, pd, pg, pd);
1264}
1265
3a7be554 1266static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1267{
1268 static const GVecGen4 op = {
1269 .fni8 = gen_nor_pg_i64,
1270 .fniv = gen_nor_pg_vec,
1271 .fno = gen_helper_sve_nor_pppp,
1272 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1273 };
dd81a8d7 1274 return do_pppp_flags(s, a, &op);
516e246a
RH
1275}
1276
1277static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1278{
1279 tcg_gen_and_i64(pd, pn, pm);
1280 tcg_gen_andc_i64(pd, pg, pd);
1281}
1282
1283static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1284 TCGv_vec pm, TCGv_vec pg)
1285{
1286 tcg_gen_and_vec(vece, pd, pn, pm);
1287 tcg_gen_andc_vec(vece, pd, pg, pd);
1288}
1289
3a7be554 1290static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1291{
1292 static const GVecGen4 op = {
1293 .fni8 = gen_nand_pg_i64,
1294 .fniv = gen_nand_pg_vec,
1295 .fno = gen_helper_sve_nand_pppp,
1296 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1297 };
dd81a8d7 1298 return do_pppp_flags(s, a, &op);
516e246a
RH
1299}
1300
9e18d7a6
RH
1301/*
1302 *** SVE Predicate Misc Group
1303 */
1304
3a7be554 1305static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1306{
1307 if (sve_access_check(s)) {
1308 int nofs = pred_full_reg_offset(s, a->rn);
1309 int gofs = pred_full_reg_offset(s, a->pg);
1310 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1311
1312 if (words == 1) {
1313 TCGv_i64 pn = tcg_temp_new_i64();
1314 TCGv_i64 pg = tcg_temp_new_i64();
1315
1316 tcg_gen_ld_i64(pn, cpu_env, nofs);
1317 tcg_gen_ld_i64(pg, cpu_env, gofs);
1318 do_predtest1(pn, pg);
1319
1320 tcg_temp_free_i64(pn);
1321 tcg_temp_free_i64(pg);
1322 } else {
1323 do_predtest(s, nofs, gofs, words);
1324 }
1325 }
1326 return true;
1327}
1328
028e2a7b
RH
1329/* See the ARM pseudocode DecodePredCount. */
1330static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1331{
1332 unsigned elements = fullsz >> esz;
1333 unsigned bound;
1334
1335 switch (pattern) {
1336 case 0x0: /* POW2 */
1337 return pow2floor(elements);
1338 case 0x1: /* VL1 */
1339 case 0x2: /* VL2 */
1340 case 0x3: /* VL3 */
1341 case 0x4: /* VL4 */
1342 case 0x5: /* VL5 */
1343 case 0x6: /* VL6 */
1344 case 0x7: /* VL7 */
1345 case 0x8: /* VL8 */
1346 bound = pattern;
1347 break;
1348 case 0x9: /* VL16 */
1349 case 0xa: /* VL32 */
1350 case 0xb: /* VL64 */
1351 case 0xc: /* VL128 */
1352 case 0xd: /* VL256 */
1353 bound = 16 << (pattern - 9);
1354 break;
1355 case 0x1d: /* MUL4 */
1356 return elements - elements % 4;
1357 case 0x1e: /* MUL3 */
1358 return elements - elements % 3;
1359 case 0x1f: /* ALL */
1360 return elements;
1361 default: /* #uimm5 */
1362 return 0;
1363 }
1364 return elements >= bound ? bound : 0;
1365}
1366
1367/* This handles all of the predicate initialization instructions,
1368 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1369 * so that decode_pred_count returns 0. For SETFFR, we will have
1370 * set RD == 16 == FFR.
1371 */
1372static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1373{
1374 if (!sve_access_check(s)) {
1375 return true;
1376 }
1377
1378 unsigned fullsz = vec_full_reg_size(s);
1379 unsigned ofs = pred_full_reg_offset(s, rd);
1380 unsigned numelem, setsz, i;
1381 uint64_t word, lastword;
1382 TCGv_i64 t;
1383
1384 numelem = decode_pred_count(fullsz, pat, esz);
1385
1386 /* Determine what we must store into each bit, and how many. */
1387 if (numelem == 0) {
1388 lastword = word = 0;
1389 setsz = fullsz;
1390 } else {
1391 setsz = numelem << esz;
1392 lastword = word = pred_esz_masks[esz];
1393 if (setsz % 64) {
973558a3 1394 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1395 }
1396 }
1397
1398 t = tcg_temp_new_i64();
1399 if (fullsz <= 64) {
1400 tcg_gen_movi_i64(t, lastword);
1401 tcg_gen_st_i64(t, cpu_env, ofs);
1402 goto done;
1403 }
1404
1405 if (word == lastword) {
1406 unsigned maxsz = size_for_gvec(fullsz / 8);
1407 unsigned oprsz = size_for_gvec(setsz / 8);
1408
1409 if (oprsz * 8 == setsz) {
8711e71f 1410 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1411 goto done;
1412 }
028e2a7b
RH
1413 }
1414
1415 setsz /= 8;
1416 fullsz /= 8;
1417
1418 tcg_gen_movi_i64(t, word);
973558a3 1419 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1420 tcg_gen_st_i64(t, cpu_env, ofs + i);
1421 }
1422 if (lastword != word) {
1423 tcg_gen_movi_i64(t, lastword);
1424 tcg_gen_st_i64(t, cpu_env, ofs + i);
1425 i += 8;
1426 }
1427 if (i < fullsz) {
1428 tcg_gen_movi_i64(t, 0);
1429 for (; i < fullsz; i += 8) {
1430 tcg_gen_st_i64(t, cpu_env, ofs + i);
1431 }
1432 }
1433
1434 done:
1435 tcg_temp_free_i64(t);
1436
1437 /* PTRUES */
1438 if (setflag) {
1439 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1440 tcg_gen_movi_i32(cpu_CF, word == 0);
1441 tcg_gen_movi_i32(cpu_VF, 0);
1442 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1443 }
1444 return true;
1445}
1446
3a7be554 1447static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1448{
1449 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1450}
1451
3a7be554 1452static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1453{
1454 /* Note pat == 31 is #all, to set all elements. */
1455 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1456}
1457
3a7be554 1458static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1459{
1460 /* Note pat == 32 is #unimp, to set no elements. */
1461 return do_predset(s, 0, a->rd, 32, false);
1462}
1463
3a7be554 1464static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1465{
1466 /* The path through do_pppp_flags is complicated enough to want to avoid
1467 * duplication. Frob the arguments into the form of a predicated AND.
1468 */
1469 arg_rprr_s alt_a = {
1470 .rd = a->rd, .pg = a->pg, .s = a->s,
1471 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1472 };
3a7be554 1473 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1474}
1475
3a7be554 1476static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1477{
1478 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1479}
1480
3a7be554 1481static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1482{
1483 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1484}
1485
1486static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1487 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1488 TCGv_ptr, TCGv_i32))
1489{
1490 if (!sve_access_check(s)) {
1491 return true;
1492 }
1493
1494 TCGv_ptr t_pd = tcg_temp_new_ptr();
1495 TCGv_ptr t_pg = tcg_temp_new_ptr();
1496 TCGv_i32 t;
86300b5d 1497 unsigned desc = 0;
028e2a7b 1498
86300b5d
RH
1499 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1500 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
028e2a7b
RH
1501
1502 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1503 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1504 t = tcg_const_i32(desc);
1505
1506 gen_fn(t, t_pd, t_pg, t);
1507 tcg_temp_free_ptr(t_pd);
1508 tcg_temp_free_ptr(t_pg);
1509
1510 do_pred_flags(t);
1511 tcg_temp_free_i32(t);
1512 return true;
1513}
1514
3a7be554 1515static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1516{
1517 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1518}
1519
3a7be554 1520static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1521{
1522 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1523}
1524
24e82e68
RH
1525/*
1526 *** SVE Element Count Group
1527 */
1528
1529/* Perform an inline saturating addition of a 32-bit value within
1530 * a 64-bit register. The second operand is known to be positive,
1531 * which halves the comparisions we must perform to bound the result.
1532 */
1533static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1534{
1535 int64_t ibound;
1536 TCGv_i64 bound;
1537 TCGCond cond;
1538
1539 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1540 if (u) {
1541 tcg_gen_ext32u_i64(reg, reg);
1542 } else {
1543 tcg_gen_ext32s_i64(reg, reg);
1544 }
1545 if (d) {
1546 tcg_gen_sub_i64(reg, reg, val);
1547 ibound = (u ? 0 : INT32_MIN);
1548 cond = TCG_COND_LT;
1549 } else {
1550 tcg_gen_add_i64(reg, reg, val);
1551 ibound = (u ? UINT32_MAX : INT32_MAX);
1552 cond = TCG_COND_GT;
1553 }
1554 bound = tcg_const_i64(ibound);
1555 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1556 tcg_temp_free_i64(bound);
1557}
1558
1559/* Similarly with 64-bit values. */
1560static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1561{
1562 TCGv_i64 t0 = tcg_temp_new_i64();
1563 TCGv_i64 t1 = tcg_temp_new_i64();
1564 TCGv_i64 t2;
1565
1566 if (u) {
1567 if (d) {
1568 tcg_gen_sub_i64(t0, reg, val);
1569 tcg_gen_movi_i64(t1, 0);
1570 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1571 } else {
1572 tcg_gen_add_i64(t0, reg, val);
1573 tcg_gen_movi_i64(t1, -1);
1574 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1575 }
1576 } else {
1577 if (d) {
1578 /* Detect signed overflow for subtraction. */
1579 tcg_gen_xor_i64(t0, reg, val);
1580 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1581 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1582 tcg_gen_and_i64(t0, t0, reg);
1583
1584 /* Bound the result. */
1585 tcg_gen_movi_i64(reg, INT64_MIN);
1586 t2 = tcg_const_i64(0);
1587 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1588 } else {
1589 /* Detect signed overflow for addition. */
1590 tcg_gen_xor_i64(t0, reg, val);
1591 tcg_gen_add_i64(reg, reg, val);
1592 tcg_gen_xor_i64(t1, reg, val);
1593 tcg_gen_andc_i64(t0, t1, t0);
1594
1595 /* Bound the result. */
1596 tcg_gen_movi_i64(t1, INT64_MAX);
1597 t2 = tcg_const_i64(0);
1598 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1599 }
1600 tcg_temp_free_i64(t2);
1601 }
1602 tcg_temp_free_i64(t0);
1603 tcg_temp_free_i64(t1);
1604}
1605
1606/* Similarly with a vector and a scalar operand. */
1607static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1608 TCGv_i64 val, bool u, bool d)
1609{
1610 unsigned vsz = vec_full_reg_size(s);
1611 TCGv_ptr dptr, nptr;
1612 TCGv_i32 t32, desc;
1613 TCGv_i64 t64;
1614
1615 dptr = tcg_temp_new_ptr();
1616 nptr = tcg_temp_new_ptr();
1617 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1618 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1619 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1620
1621 switch (esz) {
1622 case MO_8:
1623 t32 = tcg_temp_new_i32();
1624 tcg_gen_extrl_i64_i32(t32, val);
1625 if (d) {
1626 tcg_gen_neg_i32(t32, t32);
1627 }
1628 if (u) {
1629 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1630 } else {
1631 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1632 }
1633 tcg_temp_free_i32(t32);
1634 break;
1635
1636 case MO_16:
1637 t32 = tcg_temp_new_i32();
1638 tcg_gen_extrl_i64_i32(t32, val);
1639 if (d) {
1640 tcg_gen_neg_i32(t32, t32);
1641 }
1642 if (u) {
1643 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1644 } else {
1645 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1646 }
1647 tcg_temp_free_i32(t32);
1648 break;
1649
1650 case MO_32:
1651 t64 = tcg_temp_new_i64();
1652 if (d) {
1653 tcg_gen_neg_i64(t64, val);
1654 } else {
1655 tcg_gen_mov_i64(t64, val);
1656 }
1657 if (u) {
1658 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1659 } else {
1660 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1661 }
1662 tcg_temp_free_i64(t64);
1663 break;
1664
1665 case MO_64:
1666 if (u) {
1667 if (d) {
1668 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1669 } else {
1670 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1671 }
1672 } else if (d) {
1673 t64 = tcg_temp_new_i64();
1674 tcg_gen_neg_i64(t64, val);
1675 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1676 tcg_temp_free_i64(t64);
1677 } else {
1678 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1679 }
1680 break;
1681
1682 default:
1683 g_assert_not_reached();
1684 }
1685
1686 tcg_temp_free_ptr(dptr);
1687 tcg_temp_free_ptr(nptr);
1688 tcg_temp_free_i32(desc);
1689}
1690
3a7be554 1691static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1692{
1693 if (sve_access_check(s)) {
1694 unsigned fullsz = vec_full_reg_size(s);
1695 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1696 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1697 }
1698 return true;
1699}
1700
3a7be554 1701static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1702{
1703 if (sve_access_check(s)) {
1704 unsigned fullsz = vec_full_reg_size(s);
1705 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1706 int inc = numelem * a->imm * (a->d ? -1 : 1);
1707 TCGv_i64 reg = cpu_reg(s, a->rd);
1708
1709 tcg_gen_addi_i64(reg, reg, inc);
1710 }
1711 return true;
1712}
1713
3a7be554 1714static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1715{
1716 if (!sve_access_check(s)) {
1717 return true;
1718 }
1719
1720 unsigned fullsz = vec_full_reg_size(s);
1721 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1722 int inc = numelem * a->imm;
1723 TCGv_i64 reg = cpu_reg(s, a->rd);
1724
1725 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1726 if (inc == 0) {
1727 if (a->u) {
1728 tcg_gen_ext32u_i64(reg, reg);
1729 } else {
1730 tcg_gen_ext32s_i64(reg, reg);
1731 }
1732 } else {
1733 TCGv_i64 t = tcg_const_i64(inc);
1734 do_sat_addsub_32(reg, t, a->u, a->d);
1735 tcg_temp_free_i64(t);
1736 }
1737 return true;
1738}
1739
3a7be554 1740static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1741{
1742 if (!sve_access_check(s)) {
1743 return true;
1744 }
1745
1746 unsigned fullsz = vec_full_reg_size(s);
1747 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1748 int inc = numelem * a->imm;
1749 TCGv_i64 reg = cpu_reg(s, a->rd);
1750
1751 if (inc != 0) {
1752 TCGv_i64 t = tcg_const_i64(inc);
1753 do_sat_addsub_64(reg, t, a->u, a->d);
1754 tcg_temp_free_i64(t);
1755 }
1756 return true;
1757}
1758
3a7be554 1759static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1760{
1761 if (a->esz == 0) {
1762 return false;
1763 }
1764
1765 unsigned fullsz = vec_full_reg_size(s);
1766 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1767 int inc = numelem * a->imm;
1768
1769 if (inc != 0) {
1770 if (sve_access_check(s)) {
1771 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1772 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1773 vec_full_reg_offset(s, a->rn),
1774 t, fullsz, fullsz);
1775 tcg_temp_free_i64(t);
1776 }
1777 } else {
1778 do_mov_z(s, a->rd, a->rn);
1779 }
1780 return true;
1781}
1782
3a7be554 1783static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1784{
1785 if (a->esz == 0) {
1786 return false;
1787 }
1788
1789 unsigned fullsz = vec_full_reg_size(s);
1790 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1791 int inc = numelem * a->imm;
1792
1793 if (inc != 0) {
1794 if (sve_access_check(s)) {
1795 TCGv_i64 t = tcg_const_i64(inc);
1796 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1797 tcg_temp_free_i64(t);
1798 }
1799 } else {
1800 do_mov_z(s, a->rd, a->rn);
1801 }
1802 return true;
1803}
1804
e1fa1164
RH
1805/*
1806 *** SVE Bitwise Immediate Group
1807 */
1808
1809static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1810{
1811 uint64_t imm;
1812 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1813 extract32(a->dbm, 0, 6),
1814 extract32(a->dbm, 6, 6))) {
1815 return false;
1816 }
1817 if (sve_access_check(s)) {
1818 unsigned vsz = vec_full_reg_size(s);
1819 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1820 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1821 }
1822 return true;
1823}
1824
3a7be554 1825static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1826{
1827 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1828}
1829
3a7be554 1830static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1831{
1832 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1833}
1834
3a7be554 1835static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1836{
1837 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1838}
1839
3a7be554 1840static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
1841{
1842 uint64_t imm;
1843 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1844 extract32(a->dbm, 0, 6),
1845 extract32(a->dbm, 6, 6))) {
1846 return false;
1847 }
1848 if (sve_access_check(s)) {
1849 do_dupi_z(s, a->rd, imm);
1850 }
1851 return true;
1852}
1853
f25a2361
RH
1854/*
1855 *** SVE Integer Wide Immediate - Predicated Group
1856 */
1857
1858/* Implement all merging copies. This is used for CPY (immediate),
1859 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1860 */
1861static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1862 TCGv_i64 val)
1863{
1864 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1865 static gen_cpy * const fns[4] = {
1866 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1867 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1868 };
1869 unsigned vsz = vec_full_reg_size(s);
1870 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1871 TCGv_ptr t_zd = tcg_temp_new_ptr();
1872 TCGv_ptr t_zn = tcg_temp_new_ptr();
1873 TCGv_ptr t_pg = tcg_temp_new_ptr();
1874
1875 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1876 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1877 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1878
1879 fns[esz](t_zd, t_zn, t_pg, val, desc);
1880
1881 tcg_temp_free_ptr(t_zd);
1882 tcg_temp_free_ptr(t_zn);
1883 tcg_temp_free_ptr(t_pg);
1884 tcg_temp_free_i32(desc);
1885}
1886
3a7be554 1887static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
1888{
1889 if (a->esz == 0) {
1890 return false;
1891 }
1892 if (sve_access_check(s)) {
1893 /* Decode the VFP immediate. */
1894 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1895 TCGv_i64 t_imm = tcg_const_i64(imm);
1896 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1897 tcg_temp_free_i64(t_imm);
1898 }
1899 return true;
1900}
1901
3a7be554 1902static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 1903{
3a7be554 1904 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1905 return false;
1906 }
1907 if (sve_access_check(s)) {
1908 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1909 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1910 tcg_temp_free_i64(t_imm);
1911 }
1912 return true;
1913}
1914
3a7be554 1915static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
1916{
1917 static gen_helper_gvec_2i * const fns[4] = {
1918 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1919 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1920 };
1921
3a7be554 1922 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1923 return false;
1924 }
1925 if (sve_access_check(s)) {
1926 unsigned vsz = vec_full_reg_size(s);
1927 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1928 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1929 pred_full_reg_offset(s, a->pg),
1930 t_imm, vsz, vsz, 0, fns[a->esz]);
1931 tcg_temp_free_i64(t_imm);
1932 }
1933 return true;
1934}
1935
b94f8f60
RH
1936/*
1937 *** SVE Permute Extract Group
1938 */
1939
3a7be554 1940static bool trans_EXT(DisasContext *s, arg_EXT *a)
b94f8f60
RH
1941{
1942 if (!sve_access_check(s)) {
1943 return true;
1944 }
1945
1946 unsigned vsz = vec_full_reg_size(s);
1947 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1948 unsigned n_siz = vsz - n_ofs;
1949 unsigned d = vec_full_reg_offset(s, a->rd);
1950 unsigned n = vec_full_reg_offset(s, a->rn);
1951 unsigned m = vec_full_reg_offset(s, a->rm);
1952
1953 /* Use host vector move insns if we have appropriate sizes
1954 * and no unfortunate overlap.
1955 */
1956 if (m != d
1957 && n_ofs == size_for_gvec(n_ofs)
1958 && n_siz == size_for_gvec(n_siz)
1959 && (d != n || n_siz <= n_ofs)) {
1960 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1961 if (n_ofs != 0) {
1962 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1963 }
1964 } else {
1965 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1966 }
1967 return true;
1968}
1969
30562ab7
RH
1970/*
1971 *** SVE Permute - Unpredicated Group
1972 */
1973
3a7be554 1974static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
1975{
1976 if (sve_access_check(s)) {
1977 unsigned vsz = vec_full_reg_size(s);
1978 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1979 vsz, vsz, cpu_reg_sp(s, a->rn));
1980 }
1981 return true;
1982}
1983
3a7be554 1984static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
1985{
1986 if ((a->imm & 0x1f) == 0) {
1987 return false;
1988 }
1989 if (sve_access_check(s)) {
1990 unsigned vsz = vec_full_reg_size(s);
1991 unsigned dofs = vec_full_reg_offset(s, a->rd);
1992 unsigned esz, index;
1993
1994 esz = ctz32(a->imm);
1995 index = a->imm >> (esz + 1);
1996
1997 if ((index << esz) < vsz) {
1998 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1999 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2000 } else {
7e17d50e
RH
2001 /*
2002 * While dup_mem handles 128-bit elements, dup_imm does not.
2003 * Thankfully element size doesn't matter for splatting zero.
2004 */
2005 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2006 }
2007 }
2008 return true;
2009}
2010
2011static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2012{
2013 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2014 static gen_insr * const fns[4] = {
2015 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2016 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2017 };
2018 unsigned vsz = vec_full_reg_size(s);
2019 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2020 TCGv_ptr t_zd = tcg_temp_new_ptr();
2021 TCGv_ptr t_zn = tcg_temp_new_ptr();
2022
2023 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2024 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2025
2026 fns[a->esz](t_zd, t_zn, val, desc);
2027
2028 tcg_temp_free_ptr(t_zd);
2029 tcg_temp_free_ptr(t_zn);
2030 tcg_temp_free_i32(desc);
2031}
2032
3a7be554 2033static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2034{
2035 if (sve_access_check(s)) {
2036 TCGv_i64 t = tcg_temp_new_i64();
2037 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2038 do_insr_i64(s, a, t);
2039 tcg_temp_free_i64(t);
2040 }
2041 return true;
2042}
2043
3a7be554 2044static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2045{
2046 if (sve_access_check(s)) {
2047 do_insr_i64(s, a, cpu_reg(s, a->rm));
2048 }
2049 return true;
2050}
2051
3a7be554 2052static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
30562ab7
RH
2053{
2054 static gen_helper_gvec_2 * const fns[4] = {
2055 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2056 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2057 };
2058
2059 if (sve_access_check(s)) {
40e32e5a 2060 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
30562ab7
RH
2061 }
2062 return true;
2063}
2064
3a7be554 2065static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2066{
2067 static gen_helper_gvec_3 * const fns[4] = {
2068 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2069 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2070 };
2071
2072 if (sve_access_check(s)) {
e645d1a1 2073 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
30562ab7
RH
2074 }
2075 return true;
2076}
2077
3a7be554 2078static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2079{
2080 static gen_helper_gvec_2 * const fns[4][2] = {
2081 { NULL, NULL },
2082 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2083 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2084 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2085 };
2086
2087 if (a->esz == 0) {
2088 return false;
2089 }
2090 if (sve_access_check(s)) {
2091 unsigned vsz = vec_full_reg_size(s);
2092 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2093 vec_full_reg_offset(s, a->rn)
2094 + (a->h ? vsz / 2 : 0),
2095 vsz, vsz, 0, fns[a->esz][a->u]);
2096 }
2097 return true;
2098}
2099
d731d8cb
RH
2100/*
2101 *** SVE Permute - Predicates Group
2102 */
2103
2104static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2105 gen_helper_gvec_3 *fn)
2106{
2107 if (!sve_access_check(s)) {
2108 return true;
2109 }
2110
2111 unsigned vsz = pred_full_reg_size(s);
2112
d731d8cb
RH
2113 TCGv_ptr t_d = tcg_temp_new_ptr();
2114 TCGv_ptr t_n = tcg_temp_new_ptr();
2115 TCGv_ptr t_m = tcg_temp_new_ptr();
2116 TCGv_i32 t_desc;
f9b0fcce 2117 uint32_t desc = 0;
d731d8cb 2118
f9b0fcce
RH
2119 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2120 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2121 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2122
2123 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2124 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2125 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2126 t_desc = tcg_const_i32(desc);
2127
2128 fn(t_d, t_n, t_m, t_desc);
2129
2130 tcg_temp_free_ptr(t_d);
2131 tcg_temp_free_ptr(t_n);
2132 tcg_temp_free_ptr(t_m);
2133 tcg_temp_free_i32(t_desc);
2134 return true;
2135}
2136
2137static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2138 gen_helper_gvec_2 *fn)
2139{
2140 if (!sve_access_check(s)) {
2141 return true;
2142 }
2143
2144 unsigned vsz = pred_full_reg_size(s);
2145 TCGv_ptr t_d = tcg_temp_new_ptr();
2146 TCGv_ptr t_n = tcg_temp_new_ptr();
2147 TCGv_i32 t_desc;
70acaafe 2148 uint32_t desc = 0;
d731d8cb
RH
2149
2150 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2151 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2152
70acaafe
RH
2153 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2154 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2155 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
d731d8cb
RH
2156 t_desc = tcg_const_i32(desc);
2157
2158 fn(t_d, t_n, t_desc);
2159
2160 tcg_temp_free_i32(t_desc);
2161 tcg_temp_free_ptr(t_d);
2162 tcg_temp_free_ptr(t_n);
2163 return true;
2164}
2165
3a7be554 2166static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2167{
2168 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2169}
2170
3a7be554 2171static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2172{
2173 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2174}
2175
3a7be554 2176static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2177{
2178 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2179}
2180
3a7be554 2181static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2182{
2183 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2184}
2185
3a7be554 2186static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2187{
2188 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2189}
2190
3a7be554 2191static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2192{
2193 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2194}
2195
3a7be554 2196static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2197{
2198 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2199}
2200
3a7be554 2201static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2202{
2203 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2204}
2205
3a7be554 2206static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2207{
2208 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2209}
2210
234b48e9
RH
2211/*
2212 *** SVE Permute - Interleaving Group
2213 */
2214
2215static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2216{
2217 static gen_helper_gvec_3 * const fns[4] = {
2218 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2219 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2220 };
2221
2222 if (sve_access_check(s)) {
2223 unsigned vsz = vec_full_reg_size(s);
2224 unsigned high_ofs = high ? vsz / 2 : 0;
2225 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2226 vec_full_reg_offset(s, a->rn) + high_ofs,
2227 vec_full_reg_offset(s, a->rm) + high_ofs,
2228 vsz, vsz, 0, fns[a->esz]);
2229 }
2230 return true;
2231}
2232
2233static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2234 gen_helper_gvec_3 *fn)
2235{
2236 if (sve_access_check(s)) {
e645d1a1 2237 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
234b48e9
RH
2238 }
2239 return true;
2240}
2241
3a7be554 2242static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2243{
2244 return do_zip(s, a, false);
2245}
2246
3a7be554 2247static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2248{
2249 return do_zip(s, a, true);
2250}
2251
2252static gen_helper_gvec_3 * const uzp_fns[4] = {
2253 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2254 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2255};
2256
3a7be554 2257static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2258{
2259 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2260}
2261
3a7be554 2262static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2263{
2264 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2265}
2266
2267static gen_helper_gvec_3 * const trn_fns[4] = {
2268 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2269 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2270};
2271
3a7be554 2272static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2273{
2274 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2275}
2276
3a7be554 2277static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2278{
2279 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2280}
2281
3ca879ae
RH
2282/*
2283 *** SVE Permute Vector - Predicated Group
2284 */
2285
3a7be554 2286static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2287{
2288 static gen_helper_gvec_3 * const fns[4] = {
2289 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2290 };
2291 return do_zpz_ool(s, a, fns[a->esz]);
2292}
2293
ef23cb72
RH
2294/* Call the helper that computes the ARM LastActiveElement pseudocode
2295 * function, scaled by the element size. This includes the not found
2296 * indication; e.g. not found for esz=3 is -8.
2297 */
2298static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2299{
2300 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2301 * round up, as we do elsewhere, because we need the exact size.
2302 */
2303 TCGv_ptr t_p = tcg_temp_new_ptr();
2304 TCGv_i32 t_desc;
2acbfbe4 2305 unsigned desc = 0;
ef23cb72 2306
2acbfbe4
RH
2307 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2308 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
ef23cb72
RH
2309
2310 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2311 t_desc = tcg_const_i32(desc);
2312
2313 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2314
2315 tcg_temp_free_i32(t_desc);
2316 tcg_temp_free_ptr(t_p);
2317}
2318
2319/* Increment LAST to the offset of the next element in the vector,
2320 * wrapping around to 0.
2321 */
2322static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2323{
2324 unsigned vsz = vec_full_reg_size(s);
2325
2326 tcg_gen_addi_i32(last, last, 1 << esz);
2327 if (is_power_of_2(vsz)) {
2328 tcg_gen_andi_i32(last, last, vsz - 1);
2329 } else {
2330 TCGv_i32 max = tcg_const_i32(vsz);
2331 TCGv_i32 zero = tcg_const_i32(0);
2332 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2333 tcg_temp_free_i32(max);
2334 tcg_temp_free_i32(zero);
2335 }
2336}
2337
2338/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2339static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2340{
2341 unsigned vsz = vec_full_reg_size(s);
2342
2343 if (is_power_of_2(vsz)) {
2344 tcg_gen_andi_i32(last, last, vsz - 1);
2345 } else {
2346 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2347 TCGv_i32 zero = tcg_const_i32(0);
2348 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2349 tcg_temp_free_i32(max);
2350 tcg_temp_free_i32(zero);
2351 }
2352}
2353
2354/* Load an unsigned element of ESZ from BASE+OFS. */
2355static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2356{
2357 TCGv_i64 r = tcg_temp_new_i64();
2358
2359 switch (esz) {
2360 case 0:
2361 tcg_gen_ld8u_i64(r, base, ofs);
2362 break;
2363 case 1:
2364 tcg_gen_ld16u_i64(r, base, ofs);
2365 break;
2366 case 2:
2367 tcg_gen_ld32u_i64(r, base, ofs);
2368 break;
2369 case 3:
2370 tcg_gen_ld_i64(r, base, ofs);
2371 break;
2372 default:
2373 g_assert_not_reached();
2374 }
2375 return r;
2376}
2377
2378/* Load an unsigned element of ESZ from RM[LAST]. */
2379static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2380 int rm, int esz)
2381{
2382 TCGv_ptr p = tcg_temp_new_ptr();
2383 TCGv_i64 r;
2384
2385 /* Convert offset into vector into offset into ENV.
2386 * The final adjustment for the vector register base
2387 * is added via constant offset to the load.
2388 */
2389#ifdef HOST_WORDS_BIGENDIAN
2390 /* Adjust for element ordering. See vec_reg_offset. */
2391 if (esz < 3) {
2392 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2393 }
2394#endif
2395 tcg_gen_ext_i32_ptr(p, last);
2396 tcg_gen_add_ptr(p, p, cpu_env);
2397
2398 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2399 tcg_temp_free_ptr(p);
2400
2401 return r;
2402}
2403
2404/* Compute CLAST for a Zreg. */
2405static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2406{
2407 TCGv_i32 last;
2408 TCGLabel *over;
2409 TCGv_i64 ele;
2410 unsigned vsz, esz = a->esz;
2411
2412 if (!sve_access_check(s)) {
2413 return true;
2414 }
2415
2416 last = tcg_temp_local_new_i32();
2417 over = gen_new_label();
2418
2419 find_last_active(s, last, esz, a->pg);
2420
2421 /* There is of course no movcond for a 2048-bit vector,
2422 * so we must branch over the actual store.
2423 */
2424 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2425
2426 if (!before) {
2427 incr_last_active(s, last, esz);
2428 }
2429
2430 ele = load_last_active(s, last, a->rm, esz);
2431 tcg_temp_free_i32(last);
2432
2433 vsz = vec_full_reg_size(s);
2434 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2435 tcg_temp_free_i64(ele);
2436
2437 /* If this insn used MOVPRFX, we may need a second move. */
2438 if (a->rd != a->rn) {
2439 TCGLabel *done = gen_new_label();
2440 tcg_gen_br(done);
2441
2442 gen_set_label(over);
2443 do_mov_z(s, a->rd, a->rn);
2444
2445 gen_set_label(done);
2446 } else {
2447 gen_set_label(over);
2448 }
2449 return true;
2450}
2451
3a7be554 2452static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2453{
2454 return do_clast_vector(s, a, false);
2455}
2456
3a7be554 2457static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2458{
2459 return do_clast_vector(s, a, true);
2460}
2461
2462/* Compute CLAST for a scalar. */
2463static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2464 bool before, TCGv_i64 reg_val)
2465{
2466 TCGv_i32 last = tcg_temp_new_i32();
2467 TCGv_i64 ele, cmp, zero;
2468
2469 find_last_active(s, last, esz, pg);
2470
2471 /* Extend the original value of last prior to incrementing. */
2472 cmp = tcg_temp_new_i64();
2473 tcg_gen_ext_i32_i64(cmp, last);
2474
2475 if (!before) {
2476 incr_last_active(s, last, esz);
2477 }
2478
2479 /* The conceit here is that while last < 0 indicates not found, after
2480 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2481 * from which we can load garbage. We then discard the garbage with
2482 * a conditional move.
2483 */
2484 ele = load_last_active(s, last, rm, esz);
2485 tcg_temp_free_i32(last);
2486
2487 zero = tcg_const_i64(0);
2488 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2489
2490 tcg_temp_free_i64(zero);
2491 tcg_temp_free_i64(cmp);
2492 tcg_temp_free_i64(ele);
2493}
2494
2495/* Compute CLAST for a Vreg. */
2496static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2497{
2498 if (sve_access_check(s)) {
2499 int esz = a->esz;
2500 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2501 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2502
2503 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2504 write_fp_dreg(s, a->rd, reg);
2505 tcg_temp_free_i64(reg);
2506 }
2507 return true;
2508}
2509
3a7be554 2510static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2511{
2512 return do_clast_fp(s, a, false);
2513}
2514
3a7be554 2515static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2516{
2517 return do_clast_fp(s, a, true);
2518}
2519
2520/* Compute CLAST for a Xreg. */
2521static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2522{
2523 TCGv_i64 reg;
2524
2525 if (!sve_access_check(s)) {
2526 return true;
2527 }
2528
2529 reg = cpu_reg(s, a->rd);
2530 switch (a->esz) {
2531 case 0:
2532 tcg_gen_ext8u_i64(reg, reg);
2533 break;
2534 case 1:
2535 tcg_gen_ext16u_i64(reg, reg);
2536 break;
2537 case 2:
2538 tcg_gen_ext32u_i64(reg, reg);
2539 break;
2540 case 3:
2541 break;
2542 default:
2543 g_assert_not_reached();
2544 }
2545
2546 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2547 return true;
2548}
2549
3a7be554 2550static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2551{
2552 return do_clast_general(s, a, false);
2553}
2554
3a7be554 2555static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2556{
2557 return do_clast_general(s, a, true);
2558}
2559
2560/* Compute LAST for a scalar. */
2561static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2562 int pg, int rm, bool before)
2563{
2564 TCGv_i32 last = tcg_temp_new_i32();
2565 TCGv_i64 ret;
2566
2567 find_last_active(s, last, esz, pg);
2568 if (before) {
2569 wrap_last_active(s, last, esz);
2570 } else {
2571 incr_last_active(s, last, esz);
2572 }
2573
2574 ret = load_last_active(s, last, rm, esz);
2575 tcg_temp_free_i32(last);
2576 return ret;
2577}
2578
2579/* Compute LAST for a Vreg. */
2580static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2581{
2582 if (sve_access_check(s)) {
2583 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2584 write_fp_dreg(s, a->rd, val);
2585 tcg_temp_free_i64(val);
2586 }
2587 return true;
2588}
2589
3a7be554 2590static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2591{
2592 return do_last_fp(s, a, false);
2593}
2594
3a7be554 2595static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2596{
2597 return do_last_fp(s, a, true);
2598}
2599
2600/* Compute LAST for a Xreg. */
2601static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2602{
2603 if (sve_access_check(s)) {
2604 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2605 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2606 tcg_temp_free_i64(val);
2607 }
2608 return true;
2609}
2610
3a7be554 2611static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2612{
2613 return do_last_general(s, a, false);
2614}
2615
3a7be554 2616static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2617{
2618 return do_last_general(s, a, true);
2619}
2620
3a7be554 2621static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2622{
2623 if (sve_access_check(s)) {
2624 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2625 }
2626 return true;
2627}
2628
3a7be554 2629static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2630{
2631 if (sve_access_check(s)) {
2632 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2633 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2634 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2635 tcg_temp_free_i64(t);
2636 }
2637 return true;
2638}
2639
3a7be554 2640static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2641{
2642 static gen_helper_gvec_3 * const fns[4] = {
2643 NULL,
2644 gen_helper_sve_revb_h,
2645 gen_helper_sve_revb_s,
2646 gen_helper_sve_revb_d,
2647 };
2648 return do_zpz_ool(s, a, fns[a->esz]);
2649}
2650
3a7be554 2651static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2652{
2653 static gen_helper_gvec_3 * const fns[4] = {
2654 NULL,
2655 NULL,
2656 gen_helper_sve_revh_s,
2657 gen_helper_sve_revh_d,
2658 };
2659 return do_zpz_ool(s, a, fns[a->esz]);
2660}
2661
3a7be554 2662static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2663{
2664 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2665}
2666
3a7be554 2667static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2668{
2669 static gen_helper_gvec_3 * const fns[4] = {
2670 gen_helper_sve_rbit_b,
2671 gen_helper_sve_rbit_h,
2672 gen_helper_sve_rbit_s,
2673 gen_helper_sve_rbit_d,
2674 };
2675 return do_zpz_ool(s, a, fns[a->esz]);
2676}
2677
3a7be554 2678static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
2679{
2680 if (sve_access_check(s)) {
36cbb7a8 2681 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
dd701faf 2682 a->rd, a->rn, a->rm, a->pg, a->esz);
b48ff240
RH
2683 }
2684 return true;
2685}
2686
757f9cff
RH
2687/*
2688 *** SVE Integer Compare - Vectors Group
2689 */
2690
2691static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2692 gen_helper_gvec_flags_4 *gen_fn)
2693{
2694 TCGv_ptr pd, zn, zm, pg;
2695 unsigned vsz;
2696 TCGv_i32 t;
2697
2698 if (gen_fn == NULL) {
2699 return false;
2700 }
2701 if (!sve_access_check(s)) {
2702 return true;
2703 }
2704
2705 vsz = vec_full_reg_size(s);
2706 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2707 pd = tcg_temp_new_ptr();
2708 zn = tcg_temp_new_ptr();
2709 zm = tcg_temp_new_ptr();
2710 pg = tcg_temp_new_ptr();
2711
2712 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2713 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2714 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2715 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2716
2717 gen_fn(t, pd, zn, zm, pg, t);
2718
2719 tcg_temp_free_ptr(pd);
2720 tcg_temp_free_ptr(zn);
2721 tcg_temp_free_ptr(zm);
2722 tcg_temp_free_ptr(pg);
2723
2724 do_pred_flags(t);
2725
2726 tcg_temp_free_i32(t);
2727 return true;
2728}
2729
2730#define DO_PPZZ(NAME, name) \
3a7be554 2731static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2732{ \
2733 static gen_helper_gvec_flags_4 * const fns[4] = { \
2734 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2735 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2736 }; \
2737 return do_ppzz_flags(s, a, fns[a->esz]); \
2738}
2739
2740DO_PPZZ(CMPEQ, cmpeq)
2741DO_PPZZ(CMPNE, cmpne)
2742DO_PPZZ(CMPGT, cmpgt)
2743DO_PPZZ(CMPGE, cmpge)
2744DO_PPZZ(CMPHI, cmphi)
2745DO_PPZZ(CMPHS, cmphs)
2746
2747#undef DO_PPZZ
2748
2749#define DO_PPZW(NAME, name) \
3a7be554 2750static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2751{ \
2752 static gen_helper_gvec_flags_4 * const fns[4] = { \
2753 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2754 gen_helper_sve_##name##_ppzw_s, NULL \
2755 }; \
2756 return do_ppzz_flags(s, a, fns[a->esz]); \
2757}
2758
2759DO_PPZW(CMPEQ, cmpeq)
2760DO_PPZW(CMPNE, cmpne)
2761DO_PPZW(CMPGT, cmpgt)
2762DO_PPZW(CMPGE, cmpge)
2763DO_PPZW(CMPHI, cmphi)
2764DO_PPZW(CMPHS, cmphs)
2765DO_PPZW(CMPLT, cmplt)
2766DO_PPZW(CMPLE, cmple)
2767DO_PPZW(CMPLO, cmplo)
2768DO_PPZW(CMPLS, cmpls)
2769
2770#undef DO_PPZW
2771
38cadeba
RH
2772/*
2773 *** SVE Integer Compare - Immediate Groups
2774 */
2775
2776static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2777 gen_helper_gvec_flags_3 *gen_fn)
2778{
2779 TCGv_ptr pd, zn, pg;
2780 unsigned vsz;
2781 TCGv_i32 t;
2782
2783 if (gen_fn == NULL) {
2784 return false;
2785 }
2786 if (!sve_access_check(s)) {
2787 return true;
2788 }
2789
2790 vsz = vec_full_reg_size(s);
2791 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2792 pd = tcg_temp_new_ptr();
2793 zn = tcg_temp_new_ptr();
2794 pg = tcg_temp_new_ptr();
2795
2796 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2797 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2798 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2799
2800 gen_fn(t, pd, zn, pg, t);
2801
2802 tcg_temp_free_ptr(pd);
2803 tcg_temp_free_ptr(zn);
2804 tcg_temp_free_ptr(pg);
2805
2806 do_pred_flags(t);
2807
2808 tcg_temp_free_i32(t);
2809 return true;
2810}
2811
2812#define DO_PPZI(NAME, name) \
3a7be554 2813static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
2814{ \
2815 static gen_helper_gvec_flags_3 * const fns[4] = { \
2816 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2817 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2818 }; \
2819 return do_ppzi_flags(s, a, fns[a->esz]); \
2820}
2821
2822DO_PPZI(CMPEQ, cmpeq)
2823DO_PPZI(CMPNE, cmpne)
2824DO_PPZI(CMPGT, cmpgt)
2825DO_PPZI(CMPGE, cmpge)
2826DO_PPZI(CMPHI, cmphi)
2827DO_PPZI(CMPHS, cmphs)
2828DO_PPZI(CMPLT, cmplt)
2829DO_PPZI(CMPLE, cmple)
2830DO_PPZI(CMPLO, cmplo)
2831DO_PPZI(CMPLS, cmpls)
2832
2833#undef DO_PPZI
2834
35da316f
RH
2835/*
2836 *** SVE Partition Break Group
2837 */
2838
2839static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2840 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2841{
2842 if (!sve_access_check(s)) {
2843 return true;
2844 }
2845
2846 unsigned vsz = pred_full_reg_size(s);
2847
2848 /* Predicate sizes may be smaller and cannot use simd_desc. */
2849 TCGv_ptr d = tcg_temp_new_ptr();
2850 TCGv_ptr n = tcg_temp_new_ptr();
2851 TCGv_ptr m = tcg_temp_new_ptr();
2852 TCGv_ptr g = tcg_temp_new_ptr();
04c774a2 2853 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
2854
2855 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2856 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2857 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2858 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2859
2860 if (a->s) {
2861 fn_s(t, d, n, m, g, t);
2862 do_pred_flags(t);
2863 } else {
2864 fn(d, n, m, g, t);
2865 }
2866 tcg_temp_free_ptr(d);
2867 tcg_temp_free_ptr(n);
2868 tcg_temp_free_ptr(m);
2869 tcg_temp_free_ptr(g);
2870 tcg_temp_free_i32(t);
2871 return true;
2872}
2873
2874static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2875 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2876{
2877 if (!sve_access_check(s)) {
2878 return true;
2879 }
2880
2881 unsigned vsz = pred_full_reg_size(s);
2882
2883 /* Predicate sizes may be smaller and cannot use simd_desc. */
2884 TCGv_ptr d = tcg_temp_new_ptr();
2885 TCGv_ptr n = tcg_temp_new_ptr();
2886 TCGv_ptr g = tcg_temp_new_ptr();
04c774a2 2887 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
35da316f
RH
2888
2889 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2890 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2891 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2892
2893 if (a->s) {
2894 fn_s(t, d, n, g, t);
2895 do_pred_flags(t);
2896 } else {
2897 fn(d, n, g, t);
2898 }
2899 tcg_temp_free_ptr(d);
2900 tcg_temp_free_ptr(n);
2901 tcg_temp_free_ptr(g);
2902 tcg_temp_free_i32(t);
2903 return true;
2904}
2905
3a7be554 2906static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2907{
2908 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2909}
2910
3a7be554 2911static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2912{
2913 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2914}
2915
3a7be554 2916static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2917{
2918 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2919}
2920
3a7be554 2921static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2922{
2923 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2924}
2925
3a7be554 2926static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2927{
2928 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2929}
2930
3a7be554 2931static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2932{
2933 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2934}
2935
3a7be554 2936static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2937{
2938 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2939}
2940
9ee3a611
RH
2941/*
2942 *** SVE Predicate Count Group
2943 */
2944
2945static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2946{
2947 unsigned psz = pred_full_reg_size(s);
2948
2949 if (psz <= 8) {
2950 uint64_t psz_mask;
2951
2952 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2953 if (pn != pg) {
2954 TCGv_i64 g = tcg_temp_new_i64();
2955 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2956 tcg_gen_and_i64(val, val, g);
2957 tcg_temp_free_i64(g);
2958 }
2959
2960 /* Reduce the pred_esz_masks value simply to reduce the
2961 * size of the code generated here.
2962 */
2963 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2964 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2965
2966 tcg_gen_ctpop_i64(val, val);
2967 } else {
2968 TCGv_ptr t_pn = tcg_temp_new_ptr();
2969 TCGv_ptr t_pg = tcg_temp_new_ptr();
f556a201 2970 unsigned desc = 0;
9ee3a611
RH
2971 TCGv_i32 t_desc;
2972
f556a201
RH
2973 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
2974 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
9ee3a611
RH
2975
2976 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
2977 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2978 t_desc = tcg_const_i32(desc);
2979
2980 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
2981 tcg_temp_free_ptr(t_pn);
2982 tcg_temp_free_ptr(t_pg);
2983 tcg_temp_free_i32(t_desc);
2984 }
2985}
2986
3a7be554 2987static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
2988{
2989 if (sve_access_check(s)) {
2990 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
2991 }
2992 return true;
2993}
2994
3a7be554 2995static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
2996{
2997 if (sve_access_check(s)) {
2998 TCGv_i64 reg = cpu_reg(s, a->rd);
2999 TCGv_i64 val = tcg_temp_new_i64();
3000
3001 do_cntp(s, val, a->esz, a->pg, a->pg);
3002 if (a->d) {
3003 tcg_gen_sub_i64(reg, reg, val);
3004 } else {
3005 tcg_gen_add_i64(reg, reg, val);
3006 }
3007 tcg_temp_free_i64(val);
3008 }
3009 return true;
3010}
3011
3a7be554 3012static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3013{
3014 if (a->esz == 0) {
3015 return false;
3016 }
3017 if (sve_access_check(s)) {
3018 unsigned vsz = vec_full_reg_size(s);
3019 TCGv_i64 val = tcg_temp_new_i64();
3020 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3021
3022 do_cntp(s, val, a->esz, a->pg, a->pg);
3023 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3024 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3025 }
3026 return true;
3027}
3028
3a7be554 3029static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3030{
3031 if (sve_access_check(s)) {
3032 TCGv_i64 reg = cpu_reg(s, a->rd);
3033 TCGv_i64 val = tcg_temp_new_i64();
3034
3035 do_cntp(s, val, a->esz, a->pg, a->pg);
3036 do_sat_addsub_32(reg, val, a->u, a->d);
3037 }
3038 return true;
3039}
3040
3a7be554 3041static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3042{
3043 if (sve_access_check(s)) {
3044 TCGv_i64 reg = cpu_reg(s, a->rd);
3045 TCGv_i64 val = tcg_temp_new_i64();
3046
3047 do_cntp(s, val, a->esz, a->pg, a->pg);
3048 do_sat_addsub_64(reg, val, a->u, a->d);
3049 }
3050 return true;
3051}
3052
3a7be554 3053static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3054{
3055 if (a->esz == 0) {
3056 return false;
3057 }
3058 if (sve_access_check(s)) {
3059 TCGv_i64 val = tcg_temp_new_i64();
3060 do_cntp(s, val, a->esz, a->pg, a->pg);
3061 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3062 }
3063 return true;
3064}
3065
caf1cefc
RH
3066/*
3067 *** SVE Integer Compare Scalars Group
3068 */
3069
3a7be554 3070static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3071{
3072 if (!sve_access_check(s)) {
3073 return true;
3074 }
3075
3076 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3077 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3078 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3079 TCGv_i64 cmp = tcg_temp_new_i64();
3080
3081 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3082 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3083 tcg_temp_free_i64(cmp);
3084
3085 /* VF = !NF & !CF. */
3086 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3087 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3088
3089 /* Both NF and VF actually look at bit 31. */
3090 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3091 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3092 return true;
3093}
3094
3a7be554 3095static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3096{
bbd0968c 3097 TCGv_i64 op0, op1, t0, t1, tmax;
caf1cefc
RH
3098 TCGv_i32 t2, t3;
3099 TCGv_ptr ptr;
e610906c
RH
3100 unsigned vsz = vec_full_reg_size(s);
3101 unsigned desc = 0;
caf1cefc
RH
3102 TCGCond cond;
3103
bbd0968c
RH
3104 if (!sve_access_check(s)) {
3105 return true;
3106 }
3107
3108 op0 = read_cpu_reg(s, a->rn, 1);
3109 op1 = read_cpu_reg(s, a->rm, 1);
3110
caf1cefc
RH
3111 if (!a->sf) {
3112 if (a->u) {
3113 tcg_gen_ext32u_i64(op0, op0);
3114 tcg_gen_ext32u_i64(op1, op1);
3115 } else {
3116 tcg_gen_ext32s_i64(op0, op0);
3117 tcg_gen_ext32s_i64(op1, op1);
3118 }
3119 }
3120
3121 /* For the helper, compress the different conditions into a computation
3122 * of how many iterations for which the condition is true.
caf1cefc 3123 */
bbd0968c
RH
3124 t0 = tcg_temp_new_i64();
3125 t1 = tcg_temp_new_i64();
caf1cefc
RH
3126 tcg_gen_sub_i64(t0, op1, op0);
3127
bbd0968c 3128 tmax = tcg_const_i64(vsz >> a->esz);
caf1cefc
RH
3129 if (a->eq) {
3130 /* Equality means one more iteration. */
3131 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c
RH
3132
3133 /* If op1 is max (un)signed integer (and the only time the addition
3134 * above could overflow), then we produce an all-true predicate by
3135 * setting the count to the vector length. This is because the
3136 * pseudocode is described as an increment + compare loop, and the
3137 * max integer would always compare true.
3138 */
3139 tcg_gen_movi_i64(t1, (a->sf
3140 ? (a->u ? UINT64_MAX : INT64_MAX)
3141 : (a->u ? UINT32_MAX : INT32_MAX)));
3142 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3143 }
3144
bbd0968c
RH
3145 /* Bound to the maximum. */
3146 tcg_gen_umin_i64(t0, t0, tmax);
3147 tcg_temp_free_i64(tmax);
3148
3149 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3150 cond = (a->u
3151 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3152 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3153 tcg_gen_movi_i64(t1, 0);
3154 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3155 tcg_temp_free_i64(t1);
caf1cefc 3156
bbd0968c 3157 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3158 t2 = tcg_temp_new_i32();
3159 tcg_gen_extrl_i64_i32(t2, t0);
3160 tcg_temp_free_i64(t0);
bbd0968c
RH
3161
3162 /* Scale elements to bits. */
3163 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc 3164
e610906c
RH
3165 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3166 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
caf1cefc
RH
3167 t3 = tcg_const_i32(desc);
3168
3169 ptr = tcg_temp_new_ptr();
3170 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3171
3172 gen_helper_sve_while(t2, ptr, t2, t3);
3173 do_pred_flags(t2);
3174
3175 tcg_temp_free_ptr(ptr);
3176 tcg_temp_free_i32(t2);
3177 tcg_temp_free_i32(t3);
3178 return true;
3179}
3180
ed491961
RH
3181/*
3182 *** SVE Integer Wide Immediate - Unpredicated Group
3183 */
3184
3a7be554 3185static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3186{
3187 if (a->esz == 0) {
3188 return false;
3189 }
3190 if (sve_access_check(s)) {
3191 unsigned vsz = vec_full_reg_size(s);
3192 int dofs = vec_full_reg_offset(s, a->rd);
3193 uint64_t imm;
3194
3195 /* Decode the VFP immediate. */
3196 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3197 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3198 }
3199 return true;
3200}
3201
3a7be554 3202static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3203{
3a7be554 3204 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3205 return false;
3206 }
3207 if (sve_access_check(s)) {
3208 unsigned vsz = vec_full_reg_size(s);
3209 int dofs = vec_full_reg_offset(s, a->rd);
3210
8711e71f 3211 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3212 }
3213 return true;
3214}
3215
3a7be554 3216static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3217{
3a7be554 3218 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3219 return false;
3220 }
3221 if (sve_access_check(s)) {
3222 unsigned vsz = vec_full_reg_size(s);
3223 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3224 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3225 }
3226 return true;
3227}
3228
3a7be554 3229static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3230{
3231 a->imm = -a->imm;
3a7be554 3232 return trans_ADD_zzi(s, a);
6e6a157d
RH
3233}
3234
3a7be554 3235static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3236{
53229a77 3237 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3238 static const GVecGen2s op[4] = {
3239 { .fni8 = tcg_gen_vec_sub8_i64,
3240 .fniv = tcg_gen_sub_vec,
3241 .fno = gen_helper_sve_subri_b,
53229a77 3242 .opt_opc = vecop_list,
6e6a157d
RH
3243 .vece = MO_8,
3244 .scalar_first = true },
3245 { .fni8 = tcg_gen_vec_sub16_i64,
3246 .fniv = tcg_gen_sub_vec,
3247 .fno = gen_helper_sve_subri_h,
53229a77 3248 .opt_opc = vecop_list,
6e6a157d
RH
3249 .vece = MO_16,
3250 .scalar_first = true },
3251 { .fni4 = tcg_gen_sub_i32,
3252 .fniv = tcg_gen_sub_vec,
3253 .fno = gen_helper_sve_subri_s,
53229a77 3254 .opt_opc = vecop_list,
6e6a157d
RH
3255 .vece = MO_32,
3256 .scalar_first = true },
3257 { .fni8 = tcg_gen_sub_i64,
3258 .fniv = tcg_gen_sub_vec,
3259 .fno = gen_helper_sve_subri_d,
53229a77 3260 .opt_opc = vecop_list,
6e6a157d
RH
3261 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3262 .vece = MO_64,
3263 .scalar_first = true }
3264 };
3265
3a7be554 3266 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3267 return false;
3268 }
3269 if (sve_access_check(s)) {
3270 unsigned vsz = vec_full_reg_size(s);
3271 TCGv_i64 c = tcg_const_i64(a->imm);
3272 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3273 vec_full_reg_offset(s, a->rn),
3274 vsz, vsz, c, &op[a->esz]);
3275 tcg_temp_free_i64(c);
3276 }
3277 return true;
3278}
3279
3a7be554 3280static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3281{
3282 if (sve_access_check(s)) {
3283 unsigned vsz = vec_full_reg_size(s);
3284 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3285 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3286 }
3287 return true;
3288}
3289
3a7be554 3290static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3291{
3a7be554 3292 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3293 return false;
3294 }
3295 if (sve_access_check(s)) {
3296 TCGv_i64 val = tcg_const_i64(a->imm);
3297 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3298 tcg_temp_free_i64(val);
3299 }
3300 return true;
3301}
3302
3a7be554 3303static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3304{
3a7be554 3305 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3306}
3307
3a7be554 3308static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3309{
3a7be554 3310 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3311}
3312
3a7be554 3313static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3314{
3a7be554 3315 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3316}
3317
3a7be554 3318static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3319{
3a7be554 3320 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3321}
3322
3323static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3324{
3325 if (sve_access_check(s)) {
3326 unsigned vsz = vec_full_reg_size(s);
3327 TCGv_i64 c = tcg_const_i64(a->imm);
3328
3329 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3330 vec_full_reg_offset(s, a->rn),
3331 c, vsz, vsz, 0, fn);
3332 tcg_temp_free_i64(c);
3333 }
3334 return true;
3335}
3336
3337#define DO_ZZI(NAME, name) \
3a7be554 3338static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3339{ \
3340 static gen_helper_gvec_2i * const fns[4] = { \
3341 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3342 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3343 }; \
3344 return do_zzi_ool(s, a, fns[a->esz]); \
3345}
3346
3347DO_ZZI(SMAX, smax)
3348DO_ZZI(UMAX, umax)
3349DO_ZZI(SMIN, smin)
3350DO_ZZI(UMIN, umin)
3351
3352#undef DO_ZZI
3353
3a7be554 3354static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
d730ecaa
RH
3355{
3356 static gen_helper_gvec_3 * const fns[2][2] = {
3357 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3358 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3359 };
3360
3361 if (sve_access_check(s)) {
e645d1a1 3362 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
d730ecaa
RH
3363 }
3364 return true;
3365}
3366
3a7be554 3367static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
16fcfdc7
RH
3368{
3369 static gen_helper_gvec_3 * const fns[2][2] = {
3370 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3371 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3372 };
3373
3374 if (sve_access_check(s)) {
e645d1a1 3375 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
16fcfdc7
RH
3376 }
3377 return true;
3378}
3379
3380
ca40a6e6
RH
3381/*
3382 *** SVE Floating Point Multiply-Add Indexed Group
3383 */
3384
3a7be554 3385static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
ca40a6e6
RH
3386{
3387 static gen_helper_gvec_4_ptr * const fns[3] = {
3388 gen_helper_gvec_fmla_idx_h,
3389 gen_helper_gvec_fmla_idx_s,
3390 gen_helper_gvec_fmla_idx_d,
3391 };
3392
3393 if (sve_access_check(s)) {
3394 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3395 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3396 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3397 vec_full_reg_offset(s, a->rn),
3398 vec_full_reg_offset(s, a->rm),
3399 vec_full_reg_offset(s, a->ra),
3400 status, vsz, vsz, (a->index << 1) | a->sub,
3401 fns[a->esz - 1]);
3402 tcg_temp_free_ptr(status);
3403 }
3404 return true;
3405}
3406
3407/*
3408 *** SVE Floating Point Multiply Indexed Group
3409 */
3410
3a7be554 3411static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3412{
3413 static gen_helper_gvec_3_ptr * const fns[3] = {
3414 gen_helper_gvec_fmul_idx_h,
3415 gen_helper_gvec_fmul_idx_s,
3416 gen_helper_gvec_fmul_idx_d,
3417 };
3418
3419 if (sve_access_check(s)) {
3420 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3421 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3422 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3423 vec_full_reg_offset(s, a->rn),
3424 vec_full_reg_offset(s, a->rm),
3425 status, vsz, vsz, a->index, fns[a->esz - 1]);
3426 tcg_temp_free_ptr(status);
3427 }
3428 return true;
3429}
3430
23fbe79f
RH
3431/*
3432 *** SVE Floating Point Fast Reduction Group
3433 */
3434
3435typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3436 TCGv_ptr, TCGv_i32);
3437
3438static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3439 gen_helper_fp_reduce *fn)
3440{
3441 unsigned vsz = vec_full_reg_size(s);
3442 unsigned p2vsz = pow2ceil(vsz);
c648c9b7 3443 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
23fbe79f
RH
3444 TCGv_ptr t_zn, t_pg, status;
3445 TCGv_i64 temp;
3446
3447 temp = tcg_temp_new_i64();
3448 t_zn = tcg_temp_new_ptr();
3449 t_pg = tcg_temp_new_ptr();
3450
3451 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3452 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3453 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3454
3455 fn(temp, t_zn, t_pg, status, t_desc);
3456 tcg_temp_free_ptr(t_zn);
3457 tcg_temp_free_ptr(t_pg);
3458 tcg_temp_free_ptr(status);
3459 tcg_temp_free_i32(t_desc);
3460
3461 write_fp_dreg(s, a->rd, temp);
3462 tcg_temp_free_i64(temp);
3463}
3464
3465#define DO_VPZ(NAME, name) \
3a7be554 3466static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3467{ \
3468 static gen_helper_fp_reduce * const fns[3] = { \
3469 gen_helper_sve_##name##_h, \
3470 gen_helper_sve_##name##_s, \
3471 gen_helper_sve_##name##_d, \
3472 }; \
3473 if (a->esz == 0) { \
3474 return false; \
3475 } \
3476 if (sve_access_check(s)) { \
3477 do_reduce(s, a, fns[a->esz - 1]); \
3478 } \
3479 return true; \
3480}
3481
3482DO_VPZ(FADDV, faddv)
3483DO_VPZ(FMINNMV, fminnmv)
3484DO_VPZ(FMAXNMV, fmaxnmv)
3485DO_VPZ(FMINV, fminv)
3486DO_VPZ(FMAXV, fmaxv)
3487
3887c038
RH
3488/*
3489 *** SVE Floating Point Unary Operations - Unpredicated Group
3490 */
3491
3492static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3493{
3494 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3495 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
3496
3497 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3498 vec_full_reg_offset(s, a->rn),
3499 status, vsz, vsz, 0, fn);
3500 tcg_temp_free_ptr(status);
3501}
3502
3a7be554 3503static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3504{
3505 static gen_helper_gvec_2_ptr * const fns[3] = {
3506 gen_helper_gvec_frecpe_h,
3507 gen_helper_gvec_frecpe_s,
3508 gen_helper_gvec_frecpe_d,
3509 };
3510 if (a->esz == 0) {
3511 return false;
3512 }
3513 if (sve_access_check(s)) {
3514 do_zz_fp(s, a, fns[a->esz - 1]);
3515 }
3516 return true;
3517}
3518
3a7be554 3519static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3520{
3521 static gen_helper_gvec_2_ptr * const fns[3] = {
3522 gen_helper_gvec_frsqrte_h,
3523 gen_helper_gvec_frsqrte_s,
3524 gen_helper_gvec_frsqrte_d,
3525 };
3526 if (a->esz == 0) {
3527 return false;
3528 }
3529 if (sve_access_check(s)) {
3530 do_zz_fp(s, a, fns[a->esz - 1]);
3531 }
3532 return true;
3533}
3534
4d2e2a03
RH
3535/*
3536 *** SVE Floating Point Compare with Zero Group
3537 */
3538
3539static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3540 gen_helper_gvec_3_ptr *fn)
3541{
3542 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3543 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
3544
3545 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3546 vec_full_reg_offset(s, a->rn),
3547 pred_full_reg_offset(s, a->pg),
3548 status, vsz, vsz, 0, fn);
3549 tcg_temp_free_ptr(status);
3550}
3551
3552#define DO_PPZ(NAME, name) \
3a7be554 3553static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3554{ \
3555 static gen_helper_gvec_3_ptr * const fns[3] = { \
3556 gen_helper_sve_##name##_h, \
3557 gen_helper_sve_##name##_s, \
3558 gen_helper_sve_##name##_d, \
3559 }; \
3560 if (a->esz == 0) { \
3561 return false; \
3562 } \
3563 if (sve_access_check(s)) { \
3564 do_ppz_fp(s, a, fns[a->esz - 1]); \
3565 } \
3566 return true; \
3567}
3568
3569DO_PPZ(FCMGE_ppz0, fcmge0)
3570DO_PPZ(FCMGT_ppz0, fcmgt0)
3571DO_PPZ(FCMLE_ppz0, fcmle0)
3572DO_PPZ(FCMLT_ppz0, fcmlt0)
3573DO_PPZ(FCMEQ_ppz0, fcmeq0)
3574DO_PPZ(FCMNE_ppz0, fcmne0)
3575
3576#undef DO_PPZ
3577
67fcd9ad
RH
3578/*
3579 *** SVE floating-point trig multiply-add coefficient
3580 */
3581
3a7be554 3582static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
3583{
3584 static gen_helper_gvec_3_ptr * const fns[3] = {
3585 gen_helper_sve_ftmad_h,
3586 gen_helper_sve_ftmad_s,
3587 gen_helper_sve_ftmad_d,
3588 };
3589
3590 if (a->esz == 0) {
3591 return false;
3592 }
3593 if (sve_access_check(s)) {
3594 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3595 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
3596 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3597 vec_full_reg_offset(s, a->rn),
3598 vec_full_reg_offset(s, a->rm),
3599 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3600 tcg_temp_free_ptr(status);
3601 }
3602 return true;
3603}
3604
7f9ddf64
RH
3605/*
3606 *** SVE Floating Point Accumulating Reduction Group
3607 */
3608
3a7be554 3609static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3610{
3611 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3612 TCGv_ptr, TCGv_ptr, TCGv_i32);
3613 static fadda_fn * const fns[3] = {
3614 gen_helper_sve_fadda_h,
3615 gen_helper_sve_fadda_s,
3616 gen_helper_sve_fadda_d,
3617 };
3618 unsigned vsz = vec_full_reg_size(s);
3619 TCGv_ptr t_rm, t_pg, t_fpst;
3620 TCGv_i64 t_val;
3621 TCGv_i32 t_desc;
3622
3623 if (a->esz == 0) {
3624 return false;
3625 }
3626 if (!sve_access_check(s)) {
3627 return true;
3628 }
3629
3630 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3631 t_rm = tcg_temp_new_ptr();
3632 t_pg = tcg_temp_new_ptr();
3633 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3634 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3635 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7f9ddf64
RH
3636 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3637
3638 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3639
3640 tcg_temp_free_i32(t_desc);
3641 tcg_temp_free_ptr(t_fpst);
3642 tcg_temp_free_ptr(t_pg);
3643 tcg_temp_free_ptr(t_rm);
3644
3645 write_fp_dreg(s, a->rd, t_val);
3646 tcg_temp_free_i64(t_val);
3647 return true;
3648}
3649
29b80469
RH
3650/*
3651 *** SVE Floating Point Arithmetic - Unpredicated Group
3652 */
3653
3654static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3655 gen_helper_gvec_3_ptr *fn)
3656{
3657 if (fn == NULL) {
3658 return false;
3659 }
3660 if (sve_access_check(s)) {
3661 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3662 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
3663 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3664 vec_full_reg_offset(s, a->rn),
3665 vec_full_reg_offset(s, a->rm),
3666 status, vsz, vsz, 0, fn);
3667 tcg_temp_free_ptr(status);
3668 }
3669 return true;
3670}
3671
3672
3673#define DO_FP3(NAME, name) \
3a7be554 3674static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
3675{ \
3676 static gen_helper_gvec_3_ptr * const fns[4] = { \
3677 NULL, gen_helper_gvec_##name##_h, \
3678 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3679 }; \
3680 return do_zzz_fp(s, a, fns[a->esz]); \
3681}
3682
3683DO_FP3(FADD_zzz, fadd)
3684DO_FP3(FSUB_zzz, fsub)
3685DO_FP3(FMUL_zzz, fmul)
3686DO_FP3(FTSMUL, ftsmul)
3687DO_FP3(FRECPS, recps)
3688DO_FP3(FRSQRTS, rsqrts)
3689
3690#undef DO_FP3
3691
ec3b87c2
RH
3692/*
3693 *** SVE Floating Point Arithmetic - Predicated Group
3694 */
3695
3696static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3697 gen_helper_gvec_4_ptr *fn)
3698{
3699 if (fn == NULL) {
3700 return false;
3701 }
3702 if (sve_access_check(s)) {
3703 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3704 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
3705 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3706 vec_full_reg_offset(s, a->rn),
3707 vec_full_reg_offset(s, a->rm),
3708 pred_full_reg_offset(s, a->pg),
3709 status, vsz, vsz, 0, fn);
3710 tcg_temp_free_ptr(status);
3711 }
3712 return true;
3713}
3714
3715#define DO_FP3(NAME, name) \
3a7be554 3716static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
3717{ \
3718 static gen_helper_gvec_4_ptr * const fns[4] = { \
3719 NULL, gen_helper_sve_##name##_h, \
3720 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3721 }; \
3722 return do_zpzz_fp(s, a, fns[a->esz]); \
3723}
3724
3725DO_FP3(FADD_zpzz, fadd)
3726DO_FP3(FSUB_zpzz, fsub)
3727DO_FP3(FMUL_zpzz, fmul)
3728DO_FP3(FMIN_zpzz, fmin)
3729DO_FP3(FMAX_zpzz, fmax)
3730DO_FP3(FMINNM_zpzz, fminnum)
3731DO_FP3(FMAXNM_zpzz, fmaxnum)
3732DO_FP3(FABD, fabd)
3733DO_FP3(FSCALE, fscalbn)
3734DO_FP3(FDIV, fdiv)
3735DO_FP3(FMULX, fmulx)
3736
3737#undef DO_FP3
8092c6a3 3738
cc48affe
RH
3739typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3740 TCGv_i64, TCGv_ptr, TCGv_i32);
3741
3742static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3743 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3744{
3745 unsigned vsz = vec_full_reg_size(s);
3746 TCGv_ptr t_zd, t_zn, t_pg, status;
3747 TCGv_i32 desc;
3748
3749 t_zd = tcg_temp_new_ptr();
3750 t_zn = tcg_temp_new_ptr();
3751 t_pg = tcg_temp_new_ptr();
3752 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3753 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3754 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3755
cdfb22bb 3756 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
cc48affe
RH
3757 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3758 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3759
3760 tcg_temp_free_i32(desc);
3761 tcg_temp_free_ptr(status);
3762 tcg_temp_free_ptr(t_pg);
3763 tcg_temp_free_ptr(t_zn);
3764 tcg_temp_free_ptr(t_zd);
3765}
3766
3767static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3768 gen_helper_sve_fp2scalar *fn)
3769{
3770 TCGv_i64 temp = tcg_const_i64(imm);
3771 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3772 tcg_temp_free_i64(temp);
3773}
3774
3775#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 3776static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
3777{ \
3778 static gen_helper_sve_fp2scalar * const fns[3] = { \
3779 gen_helper_sve_##name##_h, \
3780 gen_helper_sve_##name##_s, \
3781 gen_helper_sve_##name##_d \
3782 }; \
3783 static uint64_t const val[3][2] = { \
3784 { float16_##const0, float16_##const1 }, \
3785 { float32_##const0, float32_##const1 }, \
3786 { float64_##const0, float64_##const1 }, \
3787 }; \
3788 if (a->esz == 0) { \
3789 return false; \
3790 } \
3791 if (sve_access_check(s)) { \
3792 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3793 } \
3794 return true; \
3795}
3796
cc48affe
RH
3797DO_FP_IMM(FADD, fadds, half, one)
3798DO_FP_IMM(FSUB, fsubs, half, one)
3799DO_FP_IMM(FMUL, fmuls, half, two)
3800DO_FP_IMM(FSUBR, fsubrs, half, one)
3801DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3802DO_FP_IMM(FMINNM, fminnms, zero, one)
3803DO_FP_IMM(FMAX, fmaxs, zero, one)
3804DO_FP_IMM(FMIN, fmins, zero, one)
3805
3806#undef DO_FP_IMM
3807
abfdefd5
RH
3808static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3809 gen_helper_gvec_4_ptr *fn)
3810{
3811 if (fn == NULL) {
3812 return false;
3813 }
3814 if (sve_access_check(s)) {
3815 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3816 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
3817 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3818 vec_full_reg_offset(s, a->rn),
3819 vec_full_reg_offset(s, a->rm),
3820 pred_full_reg_offset(s, a->pg),
3821 status, vsz, vsz, 0, fn);
3822 tcg_temp_free_ptr(status);
3823 }
3824 return true;
3825}
3826
3827#define DO_FPCMP(NAME, name) \
3a7be554 3828static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
3829{ \
3830 static gen_helper_gvec_4_ptr * const fns[4] = { \
3831 NULL, gen_helper_sve_##name##_h, \
3832 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3833 }; \
3834 return do_fp_cmp(s, a, fns[a->esz]); \
3835}
3836
3837DO_FPCMP(FCMGE, fcmge)
3838DO_FPCMP(FCMGT, fcmgt)
3839DO_FPCMP(FCMEQ, fcmeq)
3840DO_FPCMP(FCMNE, fcmne)
3841DO_FPCMP(FCMUO, fcmuo)
3842DO_FPCMP(FACGE, facge)
3843DO_FPCMP(FACGT, facgt)
3844
3845#undef DO_FPCMP
3846
3a7be554 3847static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
3848{
3849 static gen_helper_gvec_4_ptr * const fns[3] = {
3850 gen_helper_sve_fcadd_h,
3851 gen_helper_sve_fcadd_s,
3852 gen_helper_sve_fcadd_d
3853 };
3854
3855 if (a->esz == 0) {
3856 return false;
3857 }
3858 if (sve_access_check(s)) {
3859 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3860 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
3861 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3862 vec_full_reg_offset(s, a->rn),
3863 vec_full_reg_offset(s, a->rm),
3864 pred_full_reg_offset(s, a->pg),
3865 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3866 tcg_temp_free_ptr(status);
3867 }
3868 return true;
3869}
3870
08975da9
RH
3871static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3872 gen_helper_gvec_5_ptr *fn)
6ceabaad 3873{
08975da9 3874 if (a->esz == 0) {
6ceabaad
RH
3875 return false;
3876 }
08975da9
RH
3877 if (sve_access_check(s)) {
3878 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3879 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
3880 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3881 vec_full_reg_offset(s, a->rn),
3882 vec_full_reg_offset(s, a->rm),
3883 vec_full_reg_offset(s, a->ra),
3884 pred_full_reg_offset(s, a->pg),
3885 status, vsz, vsz, 0, fn);
3886 tcg_temp_free_ptr(status);
6ceabaad 3887 }
6ceabaad
RH
3888 return true;
3889}
3890
3891#define DO_FMLA(NAME, name) \
3a7be554 3892static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 3893{ \
08975da9 3894 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
3895 NULL, gen_helper_sve_##name##_h, \
3896 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3897 }; \
3898 return do_fmla(s, a, fns[a->esz]); \
3899}
3900
3901DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3902DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3903DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3904DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3905
3906#undef DO_FMLA
3907
3a7be554 3908static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 3909{
08975da9
RH
3910 static gen_helper_gvec_5_ptr * const fns[4] = {
3911 NULL,
05f48bab
RH
3912 gen_helper_sve_fcmla_zpzzz_h,
3913 gen_helper_sve_fcmla_zpzzz_s,
3914 gen_helper_sve_fcmla_zpzzz_d,
3915 };
3916
3917 if (a->esz == 0) {
3918 return false;
3919 }
3920 if (sve_access_check(s)) {
3921 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3922 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
3923 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3924 vec_full_reg_offset(s, a->rn),
3925 vec_full_reg_offset(s, a->rm),
3926 vec_full_reg_offset(s, a->ra),
3927 pred_full_reg_offset(s, a->pg),
3928 status, vsz, vsz, a->rot, fns[a->esz]);
3929 tcg_temp_free_ptr(status);
05f48bab
RH
3930 }
3931 return true;
3932}
3933
3a7be554 3934static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405
RH
3935{
3936 static gen_helper_gvec_3_ptr * const fns[2] = {
3937 gen_helper_gvec_fcmlah_idx,
3938 gen_helper_gvec_fcmlas_idx,
3939 };
3940
3941 tcg_debug_assert(a->esz == 1 || a->esz == 2);
3942 tcg_debug_assert(a->rd == a->ra);
3943 if (sve_access_check(s)) {
3944 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3945 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
18fc2405
RH
3946 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3947 vec_full_reg_offset(s, a->rn),
3948 vec_full_reg_offset(s, a->rm),
3949 status, vsz, vsz,
3950 a->index * 4 + a->rot,
3951 fns[a->esz - 1]);
3952 tcg_temp_free_ptr(status);
3953 }
3954 return true;
3955}
3956
8092c6a3
RH
3957/*
3958 *** SVE Floating Point Unary Operations Predicated Group
3959 */
3960
3961static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3962 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3963{
3964 if (sve_access_check(s)) {
3965 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3966 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
3967 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3968 vec_full_reg_offset(s, rn),
3969 pred_full_reg_offset(s, pg),
3970 status, vsz, vsz, 0, fn);
3971 tcg_temp_free_ptr(status);
3972 }
3973 return true;
3974}
3975
3a7be554 3976static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 3977{
e4ab5124 3978 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
3979}
3980
3a7be554 3981static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
3982{
3983 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
3984}
3985
3a7be554 3986static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 3987{
e4ab5124 3988 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
3989}
3990
3a7be554 3991static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
3992{
3993 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
3994}
3995
3a7be554 3996static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
3997{
3998 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
3999}
4000
3a7be554 4001static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4002{
4003 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4004}
4005
3a7be554 4006static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4007{
4008 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4009}
4010
3a7be554 4011static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4012{
4013 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4014}
4015
3a7be554 4016static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4017{
4018 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4019}
4020
3a7be554 4021static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4022{
4023 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4024}
4025
3a7be554 4026static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4027{
4028 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4029}
4030
3a7be554 4031static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4032{
4033 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4034}
4035
3a7be554 4036static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4037{
4038 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4039}
4040
3a7be554 4041static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4042{
4043 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4044}
4045
3a7be554 4046static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4047{
4048 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4049}
4050
3a7be554 4051static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4052{
4053 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4054}
4055
3a7be554 4056static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4057{
4058 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4059}
4060
3a7be554 4061static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4062{
4063 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4064}
4065
3a7be554 4066static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4067{
4068 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4069}
4070
3a7be554 4071static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4072{
4073 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4074}
4075
cda3c753
RH
4076static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4077 gen_helper_sve_frint_h,
4078 gen_helper_sve_frint_s,
4079 gen_helper_sve_frint_d
4080};
4081
3a7be554 4082static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4083{
4084 if (a->esz == 0) {
4085 return false;
4086 }
4087 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4088 frint_fns[a->esz - 1]);
4089}
4090
3a7be554 4091static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4092{
4093 static gen_helper_gvec_3_ptr * const fns[3] = {
4094 gen_helper_sve_frintx_h,
4095 gen_helper_sve_frintx_s,
4096 gen_helper_sve_frintx_d
4097 };
4098 if (a->esz == 0) {
4099 return false;
4100 }
4101 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4102}
4103
4104static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4105{
4106 if (a->esz == 0) {
4107 return false;
4108 }
4109 if (sve_access_check(s)) {
4110 unsigned vsz = vec_full_reg_size(s);
4111 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4112 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4113
4114 gen_helper_set_rmode(tmode, tmode, status);
4115
4116 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4117 vec_full_reg_offset(s, a->rn),
4118 pred_full_reg_offset(s, a->pg),
4119 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4120
4121 gen_helper_set_rmode(tmode, tmode, status);
4122 tcg_temp_free_i32(tmode);
4123 tcg_temp_free_ptr(status);
4124 }
4125 return true;
4126}
4127
3a7be554 4128static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4129{
4130 return do_frint_mode(s, a, float_round_nearest_even);
4131}
4132
3a7be554 4133static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4134{
4135 return do_frint_mode(s, a, float_round_up);
4136}
4137
3a7be554 4138static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4139{
4140 return do_frint_mode(s, a, float_round_down);
4141}
4142
3a7be554 4143static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4144{
4145 return do_frint_mode(s, a, float_round_to_zero);
4146}
4147
3a7be554 4148static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4149{
4150 return do_frint_mode(s, a, float_round_ties_away);
4151}
4152
3a7be554 4153static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4154{
4155 static gen_helper_gvec_3_ptr * const fns[3] = {
4156 gen_helper_sve_frecpx_h,
4157 gen_helper_sve_frecpx_s,
4158 gen_helper_sve_frecpx_d
4159 };
4160 if (a->esz == 0) {
4161 return false;
4162 }
4163 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4164}
4165
3a7be554 4166static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4167{
4168 static gen_helper_gvec_3_ptr * const fns[3] = {
4169 gen_helper_sve_fsqrt_h,
4170 gen_helper_sve_fsqrt_s,
4171 gen_helper_sve_fsqrt_d
4172 };
4173 if (a->esz == 0) {
4174 return false;
4175 }
4176 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4177}
4178
3a7be554 4179static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4180{
4181 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4182}
4183
3a7be554 4184static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4185{
4186 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4187}
4188
3a7be554 4189static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4190{
4191 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4192}
4193
3a7be554 4194static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4195{
4196 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4197}
4198
3a7be554 4199static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4200{
4201 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4202}
4203
3a7be554 4204static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4205{
4206 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4207}
4208
3a7be554 4209static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4210{
4211 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4212}
4213
3a7be554 4214static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4215{
4216 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4217}
4218
3a7be554 4219static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4220{
4221 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4222}
4223
3a7be554 4224static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4225{
4226 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4227}
4228
3a7be554 4229static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4230{
4231 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4232}
4233
3a7be554 4234static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4235{
4236 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4237}
4238
3a7be554 4239static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4240{
4241 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4242}
4243
3a7be554 4244static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4245{
4246 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4247}
4248
d1822297
RH
4249/*
4250 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4251 */
4252
4253/* Subroutine loading a vector register at VOFS of LEN bytes.
4254 * The load should begin at the address Rn + IMM.
4255 */
4256
19f2acc9 4257static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4258{
19f2acc9
RH
4259 int len_align = QEMU_ALIGN_DOWN(len, 8);
4260 int len_remain = len % 8;
4261 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4262 int midx = get_mem_index(s);
b2aa8879 4263 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4264
b2aa8879
RH
4265 dirty_addr = tcg_temp_new_i64();
4266 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4267 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
b2aa8879 4268 tcg_temp_free_i64(dirty_addr);
d1822297 4269
b2aa8879
RH
4270 /*
4271 * Note that unpredicated load/store of vector/predicate registers
d1822297 4272 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4273 * operations on larger quantities.
d1822297
RH
4274 * Attempt to keep code expansion to a minimum by limiting the
4275 * amount of unrolling done.
4276 */
4277 if (nparts <= 4) {
4278 int i;
4279
b2aa8879 4280 t0 = tcg_temp_new_i64();
d1822297 4281 for (i = 0; i < len_align; i += 8) {
b2aa8879 4282 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
d1822297 4283 tcg_gen_st_i64(t0, cpu_env, vofs + i);
d8227b09 4284 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4285 }
b2aa8879 4286 tcg_temp_free_i64(t0);
d1822297
RH
4287 } else {
4288 TCGLabel *loop = gen_new_label();
4289 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4290
b2aa8879
RH
4291 /* Copy the clean address into a local temp, live across the loop. */
4292 t0 = clean_addr;
4b4dc975 4293 clean_addr = new_tmp_a64_local(s);
b2aa8879 4294 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4295
b2aa8879 4296 gen_set_label(loop);
d1822297 4297
b2aa8879
RH
4298 t0 = tcg_temp_new_i64();
4299 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4300 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4301
b2aa8879 4302 tp = tcg_temp_new_ptr();
d1822297
RH
4303 tcg_gen_add_ptr(tp, cpu_env, i);
4304 tcg_gen_addi_ptr(i, i, 8);
4305 tcg_gen_st_i64(t0, tp, vofs);
4306 tcg_temp_free_ptr(tp);
b2aa8879 4307 tcg_temp_free_i64(t0);
d1822297
RH
4308
4309 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4310 tcg_temp_free_ptr(i);
4311 }
4312
b2aa8879
RH
4313 /*
4314 * Predicate register loads can be any multiple of 2.
d1822297
RH
4315 * Note that we still store the entire 64-bit unit into cpu_env.
4316 */
4317 if (len_remain) {
b2aa8879 4318 t0 = tcg_temp_new_i64();
d1822297
RH
4319 switch (len_remain) {
4320 case 2:
4321 case 4:
4322 case 8:
b2aa8879
RH
4323 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4324 MO_LE | ctz32(len_remain));
d1822297
RH
4325 break;
4326
4327 case 6:
4328 t1 = tcg_temp_new_i64();
b2aa8879
RH
4329 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4330 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4331 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4332 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4333 tcg_temp_free_i64(t1);
4334 break;
4335
4336 default:
4337 g_assert_not_reached();
4338 }
4339 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4340 tcg_temp_free_i64(t0);
d1822297 4341 }
d1822297
RH
4342}
4343
5047c204 4344/* Similarly for stores. */
19f2acc9 4345static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4346{
19f2acc9
RH
4347 int len_align = QEMU_ALIGN_DOWN(len, 8);
4348 int len_remain = len % 8;
4349 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4350 int midx = get_mem_index(s);
bba87d0a 4351 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4352
bba87d0a
RH
4353 dirty_addr = tcg_temp_new_i64();
4354 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
33e74c31 4355 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
bba87d0a 4356 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4357
4358 /* Note that unpredicated load/store of vector/predicate registers
4359 * are defined as a stream of bytes, which equates to little-endian
4360 * operations on larger quantities. There is no nice way to force
4361 * a little-endian store for aarch64_be-linux-user out of line.
4362 *
4363 * Attempt to keep code expansion to a minimum by limiting the
4364 * amount of unrolling done.
4365 */
4366 if (nparts <= 4) {
4367 int i;
4368
bba87d0a 4369 t0 = tcg_temp_new_i64();
5047c204
RH
4370 for (i = 0; i < len_align; i += 8) {
4371 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
bba87d0a 4372 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
d8227b09 4373 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5047c204 4374 }
bba87d0a 4375 tcg_temp_free_i64(t0);
5047c204
RH
4376 } else {
4377 TCGLabel *loop = gen_new_label();
bba87d0a 4378 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4379
bba87d0a
RH
4380 /* Copy the clean address into a local temp, live across the loop. */
4381 t0 = clean_addr;
4b4dc975 4382 clean_addr = new_tmp_a64_local(s);
bba87d0a 4383 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4384
bba87d0a 4385 gen_set_label(loop);
5047c204 4386
bba87d0a
RH
4387 t0 = tcg_temp_new_i64();
4388 tp = tcg_temp_new_ptr();
4389 tcg_gen_add_ptr(tp, cpu_env, i);
4390 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4391 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4392 tcg_temp_free_ptr(tp);
4393
4394 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4395 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4396 tcg_temp_free_i64(t0);
5047c204
RH
4397
4398 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4399 tcg_temp_free_ptr(i);
4400 }
4401
4402 /* Predicate register stores can be any multiple of 2. */
4403 if (len_remain) {
bba87d0a 4404 t0 = tcg_temp_new_i64();
5047c204 4405 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4406
4407 switch (len_remain) {
4408 case 2:
4409 case 4:
4410 case 8:
bba87d0a
RH
4411 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4412 MO_LE | ctz32(len_remain));
5047c204
RH
4413 break;
4414
4415 case 6:
bba87d0a
RH
4416 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4417 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4418 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4419 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4420 break;
4421
4422 default:
4423 g_assert_not_reached();
4424 }
bba87d0a 4425 tcg_temp_free_i64(t0);
5047c204 4426 }
5047c204
RH
4427}
4428
3a7be554 4429static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4430{
4431 if (sve_access_check(s)) {
4432 int size = vec_full_reg_size(s);
4433 int off = vec_full_reg_offset(s, a->rd);
4434 do_ldr(s, off, size, a->rn, a->imm * size);
4435 }
4436 return true;
4437}
4438
3a7be554 4439static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4440{
4441 if (sve_access_check(s)) {
4442 int size = pred_full_reg_size(s);
4443 int off = pred_full_reg_offset(s, a->rd);
4444 do_ldr(s, off, size, a->rn, a->imm * size);
4445 }
4446 return true;
4447}
c4e7c493 4448
3a7be554 4449static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4450{
4451 if (sve_access_check(s)) {
4452 int size = vec_full_reg_size(s);
4453 int off = vec_full_reg_offset(s, a->rd);
4454 do_str(s, off, size, a->rn, a->imm * size);
4455 }
4456 return true;
4457}
4458
3a7be554 4459static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4460{
4461 if (sve_access_check(s)) {
4462 int size = pred_full_reg_size(s);
4463 int off = pred_full_reg_offset(s, a->rd);
4464 do_str(s, off, size, a->rn, a->imm * size);
4465 }
4466 return true;
4467}
4468
c4e7c493
RH
4469/*
4470 *** SVE Memory - Contiguous Load Group
4471 */
4472
4473/* The memory mode of the dtype. */
14776ab5 4474static const MemOp dtype_mop[16] = {
c4e7c493
RH
4475 MO_UB, MO_UB, MO_UB, MO_UB,
4476 MO_SL, MO_UW, MO_UW, MO_UW,
4477 MO_SW, MO_SW, MO_UL, MO_UL,
4478 MO_SB, MO_SB, MO_SB, MO_Q
4479};
4480
4481#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4482
4483/* The vector element size of dtype. */
4484static const uint8_t dtype_esz[16] = {
4485 0, 1, 2, 3,
4486 3, 1, 2, 3,
4487 3, 2, 2, 3,
4488 3, 2, 1, 3
4489};
4490
4491static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4492 int dtype, uint32_t mte_n, bool is_write,
4493 gen_helper_gvec_mem *fn)
c4e7c493
RH
4494{
4495 unsigned vsz = vec_full_reg_size(s);
4496 TCGv_ptr t_pg;
500d0484 4497 TCGv_i32 t_desc;
206adacf 4498 int desc = 0;
c4e7c493 4499
206adacf
RH
4500 /*
4501 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4502 * registers as pointers, so encode the regno into the data field.
4503 * For consistency, do this even for LD1.
4504 */
9473d0ec 4505 if (s->mte_active[0]) {
206adacf
RH
4506 int msz = dtype_msz(dtype);
4507
4508 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4509 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4510 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4511 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 4512 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
206adacf 4513 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4514 } else {
4515 addr = clean_data_tbi(s, addr);
206adacf 4516 }
9473d0ec 4517
206adacf 4518 desc = simd_desc(vsz, vsz, zt | desc);
500d0484 4519 t_desc = tcg_const_i32(desc);
c4e7c493
RH
4520 t_pg = tcg_temp_new_ptr();
4521
4522 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
500d0484 4523 fn(cpu_env, t_pg, addr, t_desc);
c4e7c493
RH
4524
4525 tcg_temp_free_ptr(t_pg);
500d0484 4526 tcg_temp_free_i32(t_desc);
c4e7c493
RH
4527}
4528
4529static void do_ld_zpa(DisasContext *s, int zt, int pg,
4530 TCGv_i64 addr, int dtype, int nreg)
4531{
206adacf
RH
4532 static gen_helper_gvec_mem * const fns[2][2][16][4] = {
4533 { /* mte inactive, little-endian */
4534 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
7d0a57a2 4535 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
206adacf
RH
4536 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4537 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4538 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4539
4540 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4541 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4542 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4543 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4544 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4545
4546 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4547 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4548 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4549 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4550 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4551
4552 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4553 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4554 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4555 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4556 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4557
4558 /* mte inactive, big-endian */
4559 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4560 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4561 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4562 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4563 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4564
4565 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4566 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4567 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4568 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4569 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4570
4571 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4572 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4573 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4574 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4575 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4576
4577 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4578 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4579 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4580 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4581 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4582
4583 { /* mte active, little-endian */
4584 { { gen_helper_sve_ld1bb_r_mte,
4585 gen_helper_sve_ld2bb_r_mte,
4586 gen_helper_sve_ld3bb_r_mte,
4587 gen_helper_sve_ld4bb_r_mte },
4588 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4589 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4590 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4591
4592 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4593 { gen_helper_sve_ld1hh_le_r_mte,
4594 gen_helper_sve_ld2hh_le_r_mte,
4595 gen_helper_sve_ld3hh_le_r_mte,
4596 gen_helper_sve_ld4hh_le_r_mte },
4597 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4598 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4599
4600 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4601 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4602 { gen_helper_sve_ld1ss_le_r_mte,
4603 gen_helper_sve_ld2ss_le_r_mte,
4604 gen_helper_sve_ld3ss_le_r_mte,
4605 gen_helper_sve_ld4ss_le_r_mte },
4606 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4607
4608 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4609 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4610 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4611 { gen_helper_sve_ld1dd_le_r_mte,
4612 gen_helper_sve_ld2dd_le_r_mte,
4613 gen_helper_sve_ld3dd_le_r_mte,
4614 gen_helper_sve_ld4dd_le_r_mte } },
4615
4616 /* mte active, big-endian */
4617 { { gen_helper_sve_ld1bb_r_mte,
4618 gen_helper_sve_ld2bb_r_mte,
4619 gen_helper_sve_ld3bb_r_mte,
4620 gen_helper_sve_ld4bb_r_mte },
4621 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4622 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4623 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4624
4625 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4626 { gen_helper_sve_ld1hh_be_r_mte,
4627 gen_helper_sve_ld2hh_be_r_mte,
4628 gen_helper_sve_ld3hh_be_r_mte,
4629 gen_helper_sve_ld4hh_be_r_mte },
4630 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4631 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4632
4633 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4634 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4635 { gen_helper_sve_ld1ss_be_r_mte,
4636 gen_helper_sve_ld2ss_be_r_mte,
4637 gen_helper_sve_ld3ss_be_r_mte,
4638 gen_helper_sve_ld4ss_be_r_mte },
4639 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4640
4641 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4642 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4643 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4644 { gen_helper_sve_ld1dd_be_r_mte,
4645 gen_helper_sve_ld2dd_be_r_mte,
4646 gen_helper_sve_ld3dd_be_r_mte,
4647 gen_helper_sve_ld4dd_be_r_mte } } },
c4e7c493 4648 };
206adacf
RH
4649 gen_helper_gvec_mem *fn
4650 = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4651
206adacf
RH
4652 /*
4653 * While there are holes in the table, they are not
c4e7c493
RH
4654 * accessible via the instruction encoding.
4655 */
4656 assert(fn != NULL);
206adacf 4657 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4658}
4659
3a7be554 4660static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4661{
4662 if (a->rm == 31) {
4663 return false;
4664 }
4665 if (sve_access_check(s)) {
4666 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4667 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4668 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4669 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4670 }
4671 return true;
4672}
4673
3a7be554 4674static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4675{
4676 if (sve_access_check(s)) {
4677 int vsz = vec_full_reg_size(s);
4678 int elements = vsz >> dtype_esz[a->dtype];
4679 TCGv_i64 addr = new_tmp_a64(s);
4680
4681 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4682 (a->imm * elements * (a->nreg + 1))
4683 << dtype_msz(a->dtype));
4684 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4685 }
4686 return true;
4687}
e2654d75 4688
3a7be554 4689static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4690{
aa13f7c3
RH
4691 static gen_helper_gvec_mem * const fns[2][2][16] = {
4692 { /* mte inactive, little-endian */
4693 { gen_helper_sve_ldff1bb_r,
4694 gen_helper_sve_ldff1bhu_r,
4695 gen_helper_sve_ldff1bsu_r,
4696 gen_helper_sve_ldff1bdu_r,
4697
4698 gen_helper_sve_ldff1sds_le_r,
4699 gen_helper_sve_ldff1hh_le_r,
4700 gen_helper_sve_ldff1hsu_le_r,
4701 gen_helper_sve_ldff1hdu_le_r,
4702
4703 gen_helper_sve_ldff1hds_le_r,
4704 gen_helper_sve_ldff1hss_le_r,
4705 gen_helper_sve_ldff1ss_le_r,
4706 gen_helper_sve_ldff1sdu_le_r,
4707
4708 gen_helper_sve_ldff1bds_r,
4709 gen_helper_sve_ldff1bss_r,
4710 gen_helper_sve_ldff1bhs_r,
4711 gen_helper_sve_ldff1dd_le_r },
4712
4713 /* mte inactive, big-endian */
4714 { gen_helper_sve_ldff1bb_r,
4715 gen_helper_sve_ldff1bhu_r,
4716 gen_helper_sve_ldff1bsu_r,
4717 gen_helper_sve_ldff1bdu_r,
4718
4719 gen_helper_sve_ldff1sds_be_r,
4720 gen_helper_sve_ldff1hh_be_r,
4721 gen_helper_sve_ldff1hsu_be_r,
4722 gen_helper_sve_ldff1hdu_be_r,
4723
4724 gen_helper_sve_ldff1hds_be_r,
4725 gen_helper_sve_ldff1hss_be_r,
4726 gen_helper_sve_ldff1ss_be_r,
4727 gen_helper_sve_ldff1sdu_be_r,
4728
4729 gen_helper_sve_ldff1bds_r,
4730 gen_helper_sve_ldff1bss_r,
4731 gen_helper_sve_ldff1bhs_r,
4732 gen_helper_sve_ldff1dd_be_r } },
4733
4734 { /* mte active, little-endian */
4735 { gen_helper_sve_ldff1bb_r_mte,
4736 gen_helper_sve_ldff1bhu_r_mte,
4737 gen_helper_sve_ldff1bsu_r_mte,
4738 gen_helper_sve_ldff1bdu_r_mte,
4739
4740 gen_helper_sve_ldff1sds_le_r_mte,
4741 gen_helper_sve_ldff1hh_le_r_mte,
4742 gen_helper_sve_ldff1hsu_le_r_mte,
4743 gen_helper_sve_ldff1hdu_le_r_mte,
4744
4745 gen_helper_sve_ldff1hds_le_r_mte,
4746 gen_helper_sve_ldff1hss_le_r_mte,
4747 gen_helper_sve_ldff1ss_le_r_mte,
4748 gen_helper_sve_ldff1sdu_le_r_mte,
4749
4750 gen_helper_sve_ldff1bds_r_mte,
4751 gen_helper_sve_ldff1bss_r_mte,
4752 gen_helper_sve_ldff1bhs_r_mte,
4753 gen_helper_sve_ldff1dd_le_r_mte },
4754
4755 /* mte active, big-endian */
4756 { gen_helper_sve_ldff1bb_r_mte,
4757 gen_helper_sve_ldff1bhu_r_mte,
4758 gen_helper_sve_ldff1bsu_r_mte,
4759 gen_helper_sve_ldff1bdu_r_mte,
4760
4761 gen_helper_sve_ldff1sds_be_r_mte,
4762 gen_helper_sve_ldff1hh_be_r_mte,
4763 gen_helper_sve_ldff1hsu_be_r_mte,
4764 gen_helper_sve_ldff1hdu_be_r_mte,
4765
4766 gen_helper_sve_ldff1hds_be_r_mte,
4767 gen_helper_sve_ldff1hss_be_r_mte,
4768 gen_helper_sve_ldff1ss_be_r_mte,
4769 gen_helper_sve_ldff1sdu_be_r_mte,
4770
4771 gen_helper_sve_ldff1bds_r_mte,
4772 gen_helper_sve_ldff1bss_r_mte,
4773 gen_helper_sve_ldff1bhs_r_mte,
4774 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4775 };
4776
4777 if (sve_access_check(s)) {
4778 TCGv_i64 addr = new_tmp_a64(s);
4779 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4780 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4781 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4782 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4783 }
4784 return true;
4785}
4786
3a7be554 4787static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4788{
aa13f7c3
RH
4789 static gen_helper_gvec_mem * const fns[2][2][16] = {
4790 { /* mte inactive, little-endian */
4791 { gen_helper_sve_ldnf1bb_r,
4792 gen_helper_sve_ldnf1bhu_r,
4793 gen_helper_sve_ldnf1bsu_r,
4794 gen_helper_sve_ldnf1bdu_r,
4795
4796 gen_helper_sve_ldnf1sds_le_r,
4797 gen_helper_sve_ldnf1hh_le_r,
4798 gen_helper_sve_ldnf1hsu_le_r,
4799 gen_helper_sve_ldnf1hdu_le_r,
4800
4801 gen_helper_sve_ldnf1hds_le_r,
4802 gen_helper_sve_ldnf1hss_le_r,
4803 gen_helper_sve_ldnf1ss_le_r,
4804 gen_helper_sve_ldnf1sdu_le_r,
4805
4806 gen_helper_sve_ldnf1bds_r,
4807 gen_helper_sve_ldnf1bss_r,
4808 gen_helper_sve_ldnf1bhs_r,
4809 gen_helper_sve_ldnf1dd_le_r },
4810
4811 /* mte inactive, big-endian */
4812 { gen_helper_sve_ldnf1bb_r,
4813 gen_helper_sve_ldnf1bhu_r,
4814 gen_helper_sve_ldnf1bsu_r,
4815 gen_helper_sve_ldnf1bdu_r,
4816
4817 gen_helper_sve_ldnf1sds_be_r,
4818 gen_helper_sve_ldnf1hh_be_r,
4819 gen_helper_sve_ldnf1hsu_be_r,
4820 gen_helper_sve_ldnf1hdu_be_r,
4821
4822 gen_helper_sve_ldnf1hds_be_r,
4823 gen_helper_sve_ldnf1hss_be_r,
4824 gen_helper_sve_ldnf1ss_be_r,
4825 gen_helper_sve_ldnf1sdu_be_r,
4826
4827 gen_helper_sve_ldnf1bds_r,
4828 gen_helper_sve_ldnf1bss_r,
4829 gen_helper_sve_ldnf1bhs_r,
4830 gen_helper_sve_ldnf1dd_be_r } },
4831
4832 { /* mte inactive, little-endian */
4833 { gen_helper_sve_ldnf1bb_r_mte,
4834 gen_helper_sve_ldnf1bhu_r_mte,
4835 gen_helper_sve_ldnf1bsu_r_mte,
4836 gen_helper_sve_ldnf1bdu_r_mte,
4837
4838 gen_helper_sve_ldnf1sds_le_r_mte,
4839 gen_helper_sve_ldnf1hh_le_r_mte,
4840 gen_helper_sve_ldnf1hsu_le_r_mte,
4841 gen_helper_sve_ldnf1hdu_le_r_mte,
4842
4843 gen_helper_sve_ldnf1hds_le_r_mte,
4844 gen_helper_sve_ldnf1hss_le_r_mte,
4845 gen_helper_sve_ldnf1ss_le_r_mte,
4846 gen_helper_sve_ldnf1sdu_le_r_mte,
4847
4848 gen_helper_sve_ldnf1bds_r_mte,
4849 gen_helper_sve_ldnf1bss_r_mte,
4850 gen_helper_sve_ldnf1bhs_r_mte,
4851 gen_helper_sve_ldnf1dd_le_r_mte },
4852
4853 /* mte inactive, big-endian */
4854 { gen_helper_sve_ldnf1bb_r_mte,
4855 gen_helper_sve_ldnf1bhu_r_mte,
4856 gen_helper_sve_ldnf1bsu_r_mte,
4857 gen_helper_sve_ldnf1bdu_r_mte,
4858
4859 gen_helper_sve_ldnf1sds_be_r_mte,
4860 gen_helper_sve_ldnf1hh_be_r_mte,
4861 gen_helper_sve_ldnf1hsu_be_r_mte,
4862 gen_helper_sve_ldnf1hdu_be_r_mte,
4863
4864 gen_helper_sve_ldnf1hds_be_r_mte,
4865 gen_helper_sve_ldnf1hss_be_r_mte,
4866 gen_helper_sve_ldnf1ss_be_r_mte,
4867 gen_helper_sve_ldnf1sdu_be_r_mte,
4868
4869 gen_helper_sve_ldnf1bds_r_mte,
4870 gen_helper_sve_ldnf1bss_r_mte,
4871 gen_helper_sve_ldnf1bhs_r_mte,
4872 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4873 };
4874
4875 if (sve_access_check(s)) {
4876 int vsz = vec_full_reg_size(s);
4877 int elements = vsz >> dtype_esz[a->dtype];
4878 int off = (a->imm * elements) << dtype_msz(a->dtype);
4879 TCGv_i64 addr = new_tmp_a64(s);
4880
4881 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4882 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4883 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4884 }
4885 return true;
4886}
1a039c7e 4887
05abe304
RH
4888static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4889{
7d0a57a2
RH
4890 static gen_helper_gvec_mem * const fns[2][4] = {
4891 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4892 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4893 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4894 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
05abe304
RH
4895 };
4896 unsigned vsz = vec_full_reg_size(s);
4897 TCGv_ptr t_pg;
500d0484
RH
4898 TCGv_i32 t_desc;
4899 int desc, poff;
05abe304
RH
4900
4901 /* Load the first quadword using the normal predicated load helpers. */
ba080b86 4902 desc = simd_desc(16, 16, zt);
500d0484 4903 t_desc = tcg_const_i32(desc);
2a99ab2b
RH
4904
4905 poff = pred_full_reg_offset(s, pg);
4906 if (vsz > 16) {
4907 /*
4908 * Zero-extend the first 16 bits of the predicate into a temporary.
4909 * This avoids triggering an assert making sure we don't have bits
4910 * set within a predicate beyond VQ, but we have lowered VQ to 1
4911 * for this load operation.
4912 */
4913 TCGv_i64 tmp = tcg_temp_new_i64();
4914#ifdef HOST_WORDS_BIGENDIAN
4915 poff += 6;
4916#endif
4917 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4918
4919 poff = offsetof(CPUARMState, vfp.preg_tmp);
4920 tcg_gen_st_i64(tmp, cpu_env, poff);
4921 tcg_temp_free_i64(tmp);
4922 }
4923
05abe304 4924 t_pg = tcg_temp_new_ptr();
2a99ab2b 4925 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 4926
500d0484 4927 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
05abe304
RH
4928
4929 tcg_temp_free_ptr(t_pg);
500d0484 4930 tcg_temp_free_i32(t_desc);
05abe304
RH
4931
4932 /* Replicate that first quadword. */
4933 if (vsz > 16) {
4934 unsigned dofs = vec_full_reg_offset(s, zt);
4935 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4936 }
4937}
4938
3a7be554 4939static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
4940{
4941 if (a->rm == 31) {
4942 return false;
4943 }
4944 if (sve_access_check(s)) {
4945 int msz = dtype_msz(a->dtype);
4946 TCGv_i64 addr = new_tmp_a64(s);
4947 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4948 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4949 do_ldrq(s, a->rd, a->pg, addr, msz);
4950 }
4951 return true;
4952}
4953
3a7be554 4954static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
4955{
4956 if (sve_access_check(s)) {
4957 TCGv_i64 addr = new_tmp_a64(s);
4958 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4959 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4960 }
4961 return true;
4962}
4963
68459864 4964/* Load and broadcast element. */
3a7be554 4965static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 4966{
68459864
RH
4967 unsigned vsz = vec_full_reg_size(s);
4968 unsigned psz = pred_full_reg_size(s);
4969 unsigned esz = dtype_esz[a->dtype];
d0e372b0 4970 unsigned msz = dtype_msz(a->dtype);
c0ed9166 4971 TCGLabel *over;
4ac430e1 4972 TCGv_i64 temp, clean_addr;
68459864 4973
c0ed9166
RH
4974 if (!sve_access_check(s)) {
4975 return true;
4976 }
4977
4978 over = gen_new_label();
4979
68459864
RH
4980 /* If the guarding predicate has no bits set, no load occurs. */
4981 if (psz <= 8) {
4982 /* Reduce the pred_esz_masks value simply to reduce the
4983 * size of the code generated here.
4984 */
4985 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4986 temp = tcg_temp_new_i64();
4987 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4988 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4989 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4990 tcg_temp_free_i64(temp);
4991 } else {
4992 TCGv_i32 t32 = tcg_temp_new_i32();
4993 find_last_active(s, t32, esz, a->pg);
4994 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4995 tcg_temp_free_i32(t32);
4996 }
4997
4998 /* Load the data. */
4999 temp = tcg_temp_new_i64();
d0e372b0 5000 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5001 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5002
5003 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
68459864
RH
5004 s->be_data | dtype_mop[a->dtype]);
5005
5006 /* Broadcast to *all* elements. */
5007 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5008 vsz, vsz, temp);
5009 tcg_temp_free_i64(temp);
5010
5011 /* Zero the inactive elements. */
5012 gen_set_label(over);
60245996 5013 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5014}
5015
1a039c7e
RH
5016static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5017 int msz, int esz, int nreg)
5018{
71b9f394
RH
5019 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5020 { { { gen_helper_sve_st1bb_r,
5021 gen_helper_sve_st1bh_r,
5022 gen_helper_sve_st1bs_r,
5023 gen_helper_sve_st1bd_r },
5024 { NULL,
5025 gen_helper_sve_st1hh_le_r,
5026 gen_helper_sve_st1hs_le_r,
5027 gen_helper_sve_st1hd_le_r },
5028 { NULL, NULL,
5029 gen_helper_sve_st1ss_le_r,
5030 gen_helper_sve_st1sd_le_r },
5031 { NULL, NULL, NULL,
5032 gen_helper_sve_st1dd_le_r } },
5033 { { gen_helper_sve_st1bb_r,
5034 gen_helper_sve_st1bh_r,
5035 gen_helper_sve_st1bs_r,
5036 gen_helper_sve_st1bd_r },
5037 { NULL,
5038 gen_helper_sve_st1hh_be_r,
5039 gen_helper_sve_st1hs_be_r,
5040 gen_helper_sve_st1hd_be_r },
5041 { NULL, NULL,
5042 gen_helper_sve_st1ss_be_r,
5043 gen_helper_sve_st1sd_be_r },
5044 { NULL, NULL, NULL,
5045 gen_helper_sve_st1dd_be_r } } },
5046
5047 { { { gen_helper_sve_st1bb_r_mte,
5048 gen_helper_sve_st1bh_r_mte,
5049 gen_helper_sve_st1bs_r_mte,
5050 gen_helper_sve_st1bd_r_mte },
5051 { NULL,
5052 gen_helper_sve_st1hh_le_r_mte,
5053 gen_helper_sve_st1hs_le_r_mte,
5054 gen_helper_sve_st1hd_le_r_mte },
5055 { NULL, NULL,
5056 gen_helper_sve_st1ss_le_r_mte,
5057 gen_helper_sve_st1sd_le_r_mte },
5058 { NULL, NULL, NULL,
5059 gen_helper_sve_st1dd_le_r_mte } },
5060 { { gen_helper_sve_st1bb_r_mte,
5061 gen_helper_sve_st1bh_r_mte,
5062 gen_helper_sve_st1bs_r_mte,
5063 gen_helper_sve_st1bd_r_mte },
5064 { NULL,
5065 gen_helper_sve_st1hh_be_r_mte,
5066 gen_helper_sve_st1hs_be_r_mte,
5067 gen_helper_sve_st1hd_be_r_mte },
5068 { NULL, NULL,
5069 gen_helper_sve_st1ss_be_r_mte,
5070 gen_helper_sve_st1sd_be_r_mte },
5071 { NULL, NULL, NULL,
5072 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5073 };
71b9f394
RH
5074 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5075 { { { gen_helper_sve_st2bb_r,
5076 gen_helper_sve_st2hh_le_r,
5077 gen_helper_sve_st2ss_le_r,
5078 gen_helper_sve_st2dd_le_r },
5079 { gen_helper_sve_st3bb_r,
5080 gen_helper_sve_st3hh_le_r,
5081 gen_helper_sve_st3ss_le_r,
5082 gen_helper_sve_st3dd_le_r },
5083 { gen_helper_sve_st4bb_r,
5084 gen_helper_sve_st4hh_le_r,
5085 gen_helper_sve_st4ss_le_r,
5086 gen_helper_sve_st4dd_le_r } },
5087 { { gen_helper_sve_st2bb_r,
5088 gen_helper_sve_st2hh_be_r,
5089 gen_helper_sve_st2ss_be_r,
5090 gen_helper_sve_st2dd_be_r },
5091 { gen_helper_sve_st3bb_r,
5092 gen_helper_sve_st3hh_be_r,
5093 gen_helper_sve_st3ss_be_r,
5094 gen_helper_sve_st3dd_be_r },
5095 { gen_helper_sve_st4bb_r,
5096 gen_helper_sve_st4hh_be_r,
5097 gen_helper_sve_st4ss_be_r,
5098 gen_helper_sve_st4dd_be_r } } },
5099 { { { gen_helper_sve_st2bb_r_mte,
5100 gen_helper_sve_st2hh_le_r_mte,
5101 gen_helper_sve_st2ss_le_r_mte,
5102 gen_helper_sve_st2dd_le_r_mte },
5103 { gen_helper_sve_st3bb_r_mte,
5104 gen_helper_sve_st3hh_le_r_mte,
5105 gen_helper_sve_st3ss_le_r_mte,
5106 gen_helper_sve_st3dd_le_r_mte },
5107 { gen_helper_sve_st4bb_r_mte,
5108 gen_helper_sve_st4hh_le_r_mte,
5109 gen_helper_sve_st4ss_le_r_mte,
5110 gen_helper_sve_st4dd_le_r_mte } },
5111 { { gen_helper_sve_st2bb_r_mte,
5112 gen_helper_sve_st2hh_be_r_mte,
5113 gen_helper_sve_st2ss_be_r_mte,
5114 gen_helper_sve_st2dd_be_r_mte },
5115 { gen_helper_sve_st3bb_r_mte,
5116 gen_helper_sve_st3hh_be_r_mte,
5117 gen_helper_sve_st3ss_be_r_mte,
5118 gen_helper_sve_st3dd_be_r_mte },
5119 { gen_helper_sve_st4bb_r_mte,
5120 gen_helper_sve_st4hh_be_r_mte,
5121 gen_helper_sve_st4ss_be_r_mte,
5122 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5123 };
5124 gen_helper_gvec_mem *fn;
28d57f2d 5125 int be = s->be_data == MO_BE;
1a039c7e
RH
5126
5127 if (nreg == 0) {
5128 /* ST1 */
71b9f394
RH
5129 fn = fn_single[s->mte_active[0]][be][msz][esz];
5130 nreg = 1;
1a039c7e
RH
5131 } else {
5132 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5133 assert(msz == esz);
71b9f394 5134 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5135 }
5136 assert(fn != NULL);
71b9f394 5137 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5138}
5139
3a7be554 5140static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5141{
5142 if (a->rm == 31 || a->msz > a->esz) {
5143 return false;
5144 }
5145 if (sve_access_check(s)) {
5146 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5147 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5148 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5149 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5150 }
5151 return true;
5152}
5153
3a7be554 5154static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5155{
5156 if (a->msz > a->esz) {
5157 return false;
5158 }
5159 if (sve_access_check(s)) {
5160 int vsz = vec_full_reg_size(s);
5161 int elements = vsz >> a->esz;
5162 TCGv_i64 addr = new_tmp_a64(s);
5163
5164 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5165 (a->imm * elements * (a->nreg + 1)) << a->msz);
5166 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5167 }
5168 return true;
5169}
f6dbf62a
RH
5170
5171/*
5172 *** SVE gather loads / scatter stores
5173 */
5174
500d0484 5175static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5176 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5177 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5178{
5179 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5180 TCGv_ptr t_zm = tcg_temp_new_ptr();
5181 TCGv_ptr t_pg = tcg_temp_new_ptr();
5182 TCGv_ptr t_zt = tcg_temp_new_ptr();
500d0484 5183 TCGv_i32 t_desc;
d28d12f0 5184 int desc = 0;
500d0484 5185
d28d12f0
RH
5186 if (s->mte_active[0]) {
5187 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5188 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5189 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5190 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
28f32503 5191 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
d28d12f0
RH
5192 desc <<= SVE_MTEDESC_SHIFT;
5193 }
cdecb3fc 5194 desc = simd_desc(vsz, vsz, desc | scale);
500d0484 5195 t_desc = tcg_const_i32(desc);
f6dbf62a
RH
5196
5197 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5198 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5199 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
500d0484 5200 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
f6dbf62a
RH
5201
5202 tcg_temp_free_ptr(t_zt);
5203 tcg_temp_free_ptr(t_zm);
5204 tcg_temp_free_ptr(t_pg);
500d0484 5205 tcg_temp_free_i32(t_desc);
f6dbf62a
RH
5206}
5207
d28d12f0
RH
5208/* Indexed by [mte][be][ff][xs][u][msz]. */
5209static gen_helper_gvec_mem_scatter * const
5210gather_load_fn32[2][2][2][2][2][3] = {
5211 { /* MTE Inactive */
5212 { /* Little-endian */
5213 { { { gen_helper_sve_ldbss_zsu,
5214 gen_helper_sve_ldhss_le_zsu,
5215 NULL, },
5216 { gen_helper_sve_ldbsu_zsu,
5217 gen_helper_sve_ldhsu_le_zsu,
5218 gen_helper_sve_ldss_le_zsu, } },
5219 { { gen_helper_sve_ldbss_zss,
5220 gen_helper_sve_ldhss_le_zss,
5221 NULL, },
5222 { gen_helper_sve_ldbsu_zss,
5223 gen_helper_sve_ldhsu_le_zss,
5224 gen_helper_sve_ldss_le_zss, } } },
5225
5226 /* First-fault */
5227 { { { gen_helper_sve_ldffbss_zsu,
5228 gen_helper_sve_ldffhss_le_zsu,
5229 NULL, },
5230 { gen_helper_sve_ldffbsu_zsu,
5231 gen_helper_sve_ldffhsu_le_zsu,
5232 gen_helper_sve_ldffss_le_zsu, } },
5233 { { gen_helper_sve_ldffbss_zss,
5234 gen_helper_sve_ldffhss_le_zss,
5235 NULL, },
5236 { gen_helper_sve_ldffbsu_zss,
5237 gen_helper_sve_ldffhsu_le_zss,
5238 gen_helper_sve_ldffss_le_zss, } } } },
5239
5240 { /* Big-endian */
5241 { { { gen_helper_sve_ldbss_zsu,
5242 gen_helper_sve_ldhss_be_zsu,
5243 NULL, },
5244 { gen_helper_sve_ldbsu_zsu,
5245 gen_helper_sve_ldhsu_be_zsu,
5246 gen_helper_sve_ldss_be_zsu, } },
5247 { { gen_helper_sve_ldbss_zss,
5248 gen_helper_sve_ldhss_be_zss,
5249 NULL, },
5250 { gen_helper_sve_ldbsu_zss,
5251 gen_helper_sve_ldhsu_be_zss,
5252 gen_helper_sve_ldss_be_zss, } } },
5253
5254 /* First-fault */
5255 { { { gen_helper_sve_ldffbss_zsu,
5256 gen_helper_sve_ldffhss_be_zsu,
5257 NULL, },
5258 { gen_helper_sve_ldffbsu_zsu,
5259 gen_helper_sve_ldffhsu_be_zsu,
5260 gen_helper_sve_ldffss_be_zsu, } },
5261 { { gen_helper_sve_ldffbss_zss,
5262 gen_helper_sve_ldffhss_be_zss,
5263 NULL, },
5264 { gen_helper_sve_ldffbsu_zss,
5265 gen_helper_sve_ldffhsu_be_zss,
5266 gen_helper_sve_ldffss_be_zss, } } } } },
5267 { /* MTE Active */
5268 { /* Little-endian */
5269 { { { gen_helper_sve_ldbss_zsu_mte,
5270 gen_helper_sve_ldhss_le_zsu_mte,
5271 NULL, },
5272 { gen_helper_sve_ldbsu_zsu_mte,
5273 gen_helper_sve_ldhsu_le_zsu_mte,
5274 gen_helper_sve_ldss_le_zsu_mte, } },
5275 { { gen_helper_sve_ldbss_zss_mte,
5276 gen_helper_sve_ldhss_le_zss_mte,
5277 NULL, },
5278 { gen_helper_sve_ldbsu_zss_mte,
5279 gen_helper_sve_ldhsu_le_zss_mte,
5280 gen_helper_sve_ldss_le_zss_mte, } } },
5281
5282 /* First-fault */
5283 { { { gen_helper_sve_ldffbss_zsu_mte,
5284 gen_helper_sve_ldffhss_le_zsu_mte,
5285 NULL, },
5286 { gen_helper_sve_ldffbsu_zsu_mte,
5287 gen_helper_sve_ldffhsu_le_zsu_mte,
5288 gen_helper_sve_ldffss_le_zsu_mte, } },
5289 { { gen_helper_sve_ldffbss_zss_mte,
5290 gen_helper_sve_ldffhss_le_zss_mte,
5291 NULL, },
5292 { gen_helper_sve_ldffbsu_zss_mte,
5293 gen_helper_sve_ldffhsu_le_zss_mte,
5294 gen_helper_sve_ldffss_le_zss_mte, } } } },
5295
5296 { /* Big-endian */
5297 { { { gen_helper_sve_ldbss_zsu_mte,
5298 gen_helper_sve_ldhss_be_zsu_mte,
5299 NULL, },
5300 { gen_helper_sve_ldbsu_zsu_mte,
5301 gen_helper_sve_ldhsu_be_zsu_mte,
5302 gen_helper_sve_ldss_be_zsu_mte, } },
5303 { { gen_helper_sve_ldbss_zss_mte,
5304 gen_helper_sve_ldhss_be_zss_mte,
5305 NULL, },
5306 { gen_helper_sve_ldbsu_zss_mte,
5307 gen_helper_sve_ldhsu_be_zss_mte,
5308 gen_helper_sve_ldss_be_zss_mte, } } },
5309
5310 /* First-fault */
5311 { { { gen_helper_sve_ldffbss_zsu_mte,
5312 gen_helper_sve_ldffhss_be_zsu_mte,
5313 NULL, },
5314 { gen_helper_sve_ldffbsu_zsu_mte,
5315 gen_helper_sve_ldffhsu_be_zsu_mte,
5316 gen_helper_sve_ldffss_be_zsu_mte, } },
5317 { { gen_helper_sve_ldffbss_zss_mte,
5318 gen_helper_sve_ldffhss_be_zss_mte,
5319 NULL, },
5320 { gen_helper_sve_ldffbsu_zss_mte,
5321 gen_helper_sve_ldffhsu_be_zss_mte,
5322 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5323};
5324
5325/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5326static gen_helper_gvec_mem_scatter * const
5327gather_load_fn64[2][2][2][3][2][4] = {
5328 { /* MTE Inactive */
5329 { /* Little-endian */
5330 { { { gen_helper_sve_ldbds_zsu,
5331 gen_helper_sve_ldhds_le_zsu,
5332 gen_helper_sve_ldsds_le_zsu,
5333 NULL, },
5334 { gen_helper_sve_ldbdu_zsu,
5335 gen_helper_sve_ldhdu_le_zsu,
5336 gen_helper_sve_ldsdu_le_zsu,
5337 gen_helper_sve_lddd_le_zsu, } },
5338 { { gen_helper_sve_ldbds_zss,
5339 gen_helper_sve_ldhds_le_zss,
5340 gen_helper_sve_ldsds_le_zss,
5341 NULL, },
5342 { gen_helper_sve_ldbdu_zss,
5343 gen_helper_sve_ldhdu_le_zss,
5344 gen_helper_sve_ldsdu_le_zss,
5345 gen_helper_sve_lddd_le_zss, } },
5346 { { gen_helper_sve_ldbds_zd,
5347 gen_helper_sve_ldhds_le_zd,
5348 gen_helper_sve_ldsds_le_zd,
5349 NULL, },
5350 { gen_helper_sve_ldbdu_zd,
5351 gen_helper_sve_ldhdu_le_zd,
5352 gen_helper_sve_ldsdu_le_zd,
5353 gen_helper_sve_lddd_le_zd, } } },
5354
5355 /* First-fault */
5356 { { { gen_helper_sve_ldffbds_zsu,
5357 gen_helper_sve_ldffhds_le_zsu,
5358 gen_helper_sve_ldffsds_le_zsu,
5359 NULL, },
5360 { gen_helper_sve_ldffbdu_zsu,
5361 gen_helper_sve_ldffhdu_le_zsu,
5362 gen_helper_sve_ldffsdu_le_zsu,
5363 gen_helper_sve_ldffdd_le_zsu, } },
5364 { { gen_helper_sve_ldffbds_zss,
5365 gen_helper_sve_ldffhds_le_zss,
5366 gen_helper_sve_ldffsds_le_zss,
5367 NULL, },
5368 { gen_helper_sve_ldffbdu_zss,
5369 gen_helper_sve_ldffhdu_le_zss,
5370 gen_helper_sve_ldffsdu_le_zss,
5371 gen_helper_sve_ldffdd_le_zss, } },
5372 { { gen_helper_sve_ldffbds_zd,
5373 gen_helper_sve_ldffhds_le_zd,
5374 gen_helper_sve_ldffsds_le_zd,
5375 NULL, },
5376 { gen_helper_sve_ldffbdu_zd,
5377 gen_helper_sve_ldffhdu_le_zd,
5378 gen_helper_sve_ldffsdu_le_zd,
5379 gen_helper_sve_ldffdd_le_zd, } } } },
5380 { /* Big-endian */
5381 { { { gen_helper_sve_ldbds_zsu,
5382 gen_helper_sve_ldhds_be_zsu,
5383 gen_helper_sve_ldsds_be_zsu,
5384 NULL, },
5385 { gen_helper_sve_ldbdu_zsu,
5386 gen_helper_sve_ldhdu_be_zsu,
5387 gen_helper_sve_ldsdu_be_zsu,
5388 gen_helper_sve_lddd_be_zsu, } },
5389 { { gen_helper_sve_ldbds_zss,
5390 gen_helper_sve_ldhds_be_zss,
5391 gen_helper_sve_ldsds_be_zss,
5392 NULL, },
5393 { gen_helper_sve_ldbdu_zss,
5394 gen_helper_sve_ldhdu_be_zss,
5395 gen_helper_sve_ldsdu_be_zss,
5396 gen_helper_sve_lddd_be_zss, } },
5397 { { gen_helper_sve_ldbds_zd,
5398 gen_helper_sve_ldhds_be_zd,
5399 gen_helper_sve_ldsds_be_zd,
5400 NULL, },
5401 { gen_helper_sve_ldbdu_zd,
5402 gen_helper_sve_ldhdu_be_zd,
5403 gen_helper_sve_ldsdu_be_zd,
5404 gen_helper_sve_lddd_be_zd, } } },
5405
5406 /* First-fault */
5407 { { { gen_helper_sve_ldffbds_zsu,
5408 gen_helper_sve_ldffhds_be_zsu,
5409 gen_helper_sve_ldffsds_be_zsu,
5410 NULL, },
5411 { gen_helper_sve_ldffbdu_zsu,
5412 gen_helper_sve_ldffhdu_be_zsu,
5413 gen_helper_sve_ldffsdu_be_zsu,
5414 gen_helper_sve_ldffdd_be_zsu, } },
5415 { { gen_helper_sve_ldffbds_zss,
5416 gen_helper_sve_ldffhds_be_zss,
5417 gen_helper_sve_ldffsds_be_zss,
5418 NULL, },
5419 { gen_helper_sve_ldffbdu_zss,
5420 gen_helper_sve_ldffhdu_be_zss,
5421 gen_helper_sve_ldffsdu_be_zss,
5422 gen_helper_sve_ldffdd_be_zss, } },
5423 { { gen_helper_sve_ldffbds_zd,
5424 gen_helper_sve_ldffhds_be_zd,
5425 gen_helper_sve_ldffsds_be_zd,
5426 NULL, },
5427 { gen_helper_sve_ldffbdu_zd,
5428 gen_helper_sve_ldffhdu_be_zd,
5429 gen_helper_sve_ldffsdu_be_zd,
5430 gen_helper_sve_ldffdd_be_zd, } } } } },
5431 { /* MTE Active */
5432 { /* Little-endian */
5433 { { { gen_helper_sve_ldbds_zsu_mte,
5434 gen_helper_sve_ldhds_le_zsu_mte,
5435 gen_helper_sve_ldsds_le_zsu_mte,
5436 NULL, },
5437 { gen_helper_sve_ldbdu_zsu_mte,
5438 gen_helper_sve_ldhdu_le_zsu_mte,
5439 gen_helper_sve_ldsdu_le_zsu_mte,
5440 gen_helper_sve_lddd_le_zsu_mte, } },
5441 { { gen_helper_sve_ldbds_zss_mte,
5442 gen_helper_sve_ldhds_le_zss_mte,
5443 gen_helper_sve_ldsds_le_zss_mte,
5444 NULL, },
5445 { gen_helper_sve_ldbdu_zss_mte,
5446 gen_helper_sve_ldhdu_le_zss_mte,
5447 gen_helper_sve_ldsdu_le_zss_mte,
5448 gen_helper_sve_lddd_le_zss_mte, } },
5449 { { gen_helper_sve_ldbds_zd_mte,
5450 gen_helper_sve_ldhds_le_zd_mte,
5451 gen_helper_sve_ldsds_le_zd_mte,
5452 NULL, },
5453 { gen_helper_sve_ldbdu_zd_mte,
5454 gen_helper_sve_ldhdu_le_zd_mte,
5455 gen_helper_sve_ldsdu_le_zd_mte,
5456 gen_helper_sve_lddd_le_zd_mte, } } },
5457
5458 /* First-fault */
5459 { { { gen_helper_sve_ldffbds_zsu_mte,
5460 gen_helper_sve_ldffhds_le_zsu_mte,
5461 gen_helper_sve_ldffsds_le_zsu_mte,
5462 NULL, },
5463 { gen_helper_sve_ldffbdu_zsu_mte,
5464 gen_helper_sve_ldffhdu_le_zsu_mte,
5465 gen_helper_sve_ldffsdu_le_zsu_mte,
5466 gen_helper_sve_ldffdd_le_zsu_mte, } },
5467 { { gen_helper_sve_ldffbds_zss_mte,
5468 gen_helper_sve_ldffhds_le_zss_mte,
5469 gen_helper_sve_ldffsds_le_zss_mte,
5470 NULL, },
5471 { gen_helper_sve_ldffbdu_zss_mte,
5472 gen_helper_sve_ldffhdu_le_zss_mte,
5473 gen_helper_sve_ldffsdu_le_zss_mte,
5474 gen_helper_sve_ldffdd_le_zss_mte, } },
5475 { { gen_helper_sve_ldffbds_zd_mte,
5476 gen_helper_sve_ldffhds_le_zd_mte,
5477 gen_helper_sve_ldffsds_le_zd_mte,
5478 NULL, },
5479 { gen_helper_sve_ldffbdu_zd_mte,
5480 gen_helper_sve_ldffhdu_le_zd_mte,
5481 gen_helper_sve_ldffsdu_le_zd_mte,
5482 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5483 { /* Big-endian */
5484 { { { gen_helper_sve_ldbds_zsu_mte,
5485 gen_helper_sve_ldhds_be_zsu_mte,
5486 gen_helper_sve_ldsds_be_zsu_mte,
5487 NULL, },
5488 { gen_helper_sve_ldbdu_zsu_mte,
5489 gen_helper_sve_ldhdu_be_zsu_mte,
5490 gen_helper_sve_ldsdu_be_zsu_mte,
5491 gen_helper_sve_lddd_be_zsu_mte, } },
5492 { { gen_helper_sve_ldbds_zss_mte,
5493 gen_helper_sve_ldhds_be_zss_mte,
5494 gen_helper_sve_ldsds_be_zss_mte,
5495 NULL, },
5496 { gen_helper_sve_ldbdu_zss_mte,
5497 gen_helper_sve_ldhdu_be_zss_mte,
5498 gen_helper_sve_ldsdu_be_zss_mte,
5499 gen_helper_sve_lddd_be_zss_mte, } },
5500 { { gen_helper_sve_ldbds_zd_mte,
5501 gen_helper_sve_ldhds_be_zd_mte,
5502 gen_helper_sve_ldsds_be_zd_mte,
5503 NULL, },
5504 { gen_helper_sve_ldbdu_zd_mte,
5505 gen_helper_sve_ldhdu_be_zd_mte,
5506 gen_helper_sve_ldsdu_be_zd_mte,
5507 gen_helper_sve_lddd_be_zd_mte, } } },
5508
5509 /* First-fault */
5510 { { { gen_helper_sve_ldffbds_zsu_mte,
5511 gen_helper_sve_ldffhds_be_zsu_mte,
5512 gen_helper_sve_ldffsds_be_zsu_mte,
5513 NULL, },
5514 { gen_helper_sve_ldffbdu_zsu_mte,
5515 gen_helper_sve_ldffhdu_be_zsu_mte,
5516 gen_helper_sve_ldffsdu_be_zsu_mte,
5517 gen_helper_sve_ldffdd_be_zsu_mte, } },
5518 { { gen_helper_sve_ldffbds_zss_mte,
5519 gen_helper_sve_ldffhds_be_zss_mte,
5520 gen_helper_sve_ldffsds_be_zss_mte,
5521 NULL, },
5522 { gen_helper_sve_ldffbdu_zss_mte,
5523 gen_helper_sve_ldffhdu_be_zss_mte,
5524 gen_helper_sve_ldffsdu_be_zss_mte,
5525 gen_helper_sve_ldffdd_be_zss_mte, } },
5526 { { gen_helper_sve_ldffbds_zd_mte,
5527 gen_helper_sve_ldffhds_be_zd_mte,
5528 gen_helper_sve_ldffsds_be_zd_mte,
5529 NULL, },
5530 { gen_helper_sve_ldffbdu_zd_mte,
5531 gen_helper_sve_ldffhdu_be_zd_mte,
5532 gen_helper_sve_ldffsdu_be_zd_mte,
5533 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5534};
5535
3a7be554 5536static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5537{
5538 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5539 bool be = s->be_data == MO_BE;
5540 bool mte = s->mte_active[0];
673e9fa6
RH
5541
5542 if (!sve_access_check(s)) {
5543 return true;
5544 }
5545
5546 switch (a->esz) {
5547 case MO_32:
d28d12f0 5548 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5549 break;
5550 case MO_64:
d28d12f0 5551 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5552 break;
5553 }
5554 assert(fn != NULL);
5555
5556 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5557 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5558 return true;
5559}
5560
3a7be554 5561static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5562{
5563 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5564 bool be = s->be_data == MO_BE;
5565 bool mte = s->mte_active[0];
673e9fa6
RH
5566 TCGv_i64 imm;
5567
5568 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5569 return false;
5570 }
5571 if (!sve_access_check(s)) {
5572 return true;
5573 }
5574
5575 switch (a->esz) {
5576 case MO_32:
d28d12f0 5577 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5578 break;
5579 case MO_64:
d28d12f0 5580 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5581 break;
5582 }
5583 assert(fn != NULL);
5584
5585 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5586 * by loading the immediate into the scalar parameter.
5587 */
5588 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5589 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
673e9fa6
RH
5590 tcg_temp_free_i64(imm);
5591 return true;
5592}
5593
d28d12f0
RH
5594/* Indexed by [mte][be][xs][msz]. */
5595static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5596 { /* MTE Inactive */
5597 { /* Little-endian */
5598 { gen_helper_sve_stbs_zsu,
5599 gen_helper_sve_sths_le_zsu,
5600 gen_helper_sve_stss_le_zsu, },
5601 { gen_helper_sve_stbs_zss,
5602 gen_helper_sve_sths_le_zss,
5603 gen_helper_sve_stss_le_zss, } },
5604 { /* Big-endian */
5605 { gen_helper_sve_stbs_zsu,
5606 gen_helper_sve_sths_be_zsu,
5607 gen_helper_sve_stss_be_zsu, },
5608 { gen_helper_sve_stbs_zss,
5609 gen_helper_sve_sths_be_zss,
5610 gen_helper_sve_stss_be_zss, } } },
5611 { /* MTE Active */
5612 { /* Little-endian */
5613 { gen_helper_sve_stbs_zsu_mte,
5614 gen_helper_sve_sths_le_zsu_mte,
5615 gen_helper_sve_stss_le_zsu_mte, },
5616 { gen_helper_sve_stbs_zss_mte,
5617 gen_helper_sve_sths_le_zss_mte,
5618 gen_helper_sve_stss_le_zss_mte, } },
5619 { /* Big-endian */
5620 { gen_helper_sve_stbs_zsu_mte,
5621 gen_helper_sve_sths_be_zsu_mte,
5622 gen_helper_sve_stss_be_zsu_mte, },
5623 { gen_helper_sve_stbs_zss_mte,
5624 gen_helper_sve_sths_be_zss_mte,
5625 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5626};
5627
5628/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5629static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5630 { /* MTE Inactive */
5631 { /* Little-endian */
5632 { gen_helper_sve_stbd_zsu,
5633 gen_helper_sve_sthd_le_zsu,
5634 gen_helper_sve_stsd_le_zsu,
5635 gen_helper_sve_stdd_le_zsu, },
5636 { gen_helper_sve_stbd_zss,
5637 gen_helper_sve_sthd_le_zss,
5638 gen_helper_sve_stsd_le_zss,
5639 gen_helper_sve_stdd_le_zss, },
5640 { gen_helper_sve_stbd_zd,
5641 gen_helper_sve_sthd_le_zd,
5642 gen_helper_sve_stsd_le_zd,
5643 gen_helper_sve_stdd_le_zd, } },
5644 { /* Big-endian */
5645 { gen_helper_sve_stbd_zsu,
5646 gen_helper_sve_sthd_be_zsu,
5647 gen_helper_sve_stsd_be_zsu,
5648 gen_helper_sve_stdd_be_zsu, },
5649 { gen_helper_sve_stbd_zss,
5650 gen_helper_sve_sthd_be_zss,
5651 gen_helper_sve_stsd_be_zss,
5652 gen_helper_sve_stdd_be_zss, },
5653 { gen_helper_sve_stbd_zd,
5654 gen_helper_sve_sthd_be_zd,
5655 gen_helper_sve_stsd_be_zd,
5656 gen_helper_sve_stdd_be_zd, } } },
5657 { /* MTE Inactive */
5658 { /* Little-endian */
5659 { gen_helper_sve_stbd_zsu_mte,
5660 gen_helper_sve_sthd_le_zsu_mte,
5661 gen_helper_sve_stsd_le_zsu_mte,
5662 gen_helper_sve_stdd_le_zsu_mte, },
5663 { gen_helper_sve_stbd_zss_mte,
5664 gen_helper_sve_sthd_le_zss_mte,
5665 gen_helper_sve_stsd_le_zss_mte,
5666 gen_helper_sve_stdd_le_zss_mte, },
5667 { gen_helper_sve_stbd_zd_mte,
5668 gen_helper_sve_sthd_le_zd_mte,
5669 gen_helper_sve_stsd_le_zd_mte,
5670 gen_helper_sve_stdd_le_zd_mte, } },
5671 { /* Big-endian */
5672 { gen_helper_sve_stbd_zsu_mte,
5673 gen_helper_sve_sthd_be_zsu_mte,
5674 gen_helper_sve_stsd_be_zsu_mte,
5675 gen_helper_sve_stdd_be_zsu_mte, },
5676 { gen_helper_sve_stbd_zss_mte,
5677 gen_helper_sve_sthd_be_zss_mte,
5678 gen_helper_sve_stsd_be_zss_mte,
5679 gen_helper_sve_stdd_be_zss_mte, },
5680 { gen_helper_sve_stbd_zd_mte,
5681 gen_helper_sve_sthd_be_zd_mte,
5682 gen_helper_sve_stsd_be_zd_mte,
5683 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5684};
5685
3a7be554 5686static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5687{
f6dbf62a 5688 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5689 bool be = s->be_data == MO_BE;
5690 bool mte = s->mte_active[0];
f6dbf62a
RH
5691
5692 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5693 return false;
5694 }
5695 if (!sve_access_check(s)) {
5696 return true;
5697 }
5698 switch (a->esz) {
5699 case MO_32:
d28d12f0 5700 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5701 break;
5702 case MO_64:
d28d12f0 5703 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5704 break;
5705 default:
5706 g_assert_not_reached();
5707 }
5708 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5709 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5710 return true;
5711}
dec6cf6b 5712
3a7be554 5713static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5714{
5715 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5716 bool be = s->be_data == MO_BE;
5717 bool mte = s->mte_active[0];
408ecde9
RH
5718 TCGv_i64 imm;
5719
5720 if (a->esz < a->msz) {
5721 return false;
5722 }
5723 if (!sve_access_check(s)) {
5724 return true;
5725 }
5726
5727 switch (a->esz) {
5728 case MO_32:
d28d12f0 5729 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5730 break;
5731 case MO_64:
d28d12f0 5732 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5733 break;
5734 }
5735 assert(fn != NULL);
5736
5737 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5738 * by loading the immediate into the scalar parameter.
5739 */
5740 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5741 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
408ecde9
RH
5742 tcg_temp_free_i64(imm);
5743 return true;
5744}
5745
dec6cf6b
RH
5746/*
5747 * Prefetches
5748 */
5749
3a7be554 5750static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
5751{
5752 /* Prefetch is a nop within QEMU. */
2f95a3b0 5753 (void)sve_access_check(s);
dec6cf6b
RH
5754 return true;
5755}
5756
3a7be554 5757static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
5758{
5759 if (a->rm == 31) {
5760 return false;
5761 }
5762 /* Prefetch is a nop within QEMU. */
2f95a3b0 5763 (void)sve_access_check(s);
dec6cf6b
RH
5764 return true;
5765}
a2103582
RH
5766
5767/*
5768 * Move Prefix
5769 *
5770 * TODO: The implementation so far could handle predicated merging movprfx.
5771 * The helper functions as written take an extra source register to
5772 * use in the operation, but the result is only written when predication
5773 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5774 * to allow the final write back to the destination to be unconditional.
5775 * For predicated zeroing movprfx, we need to rearrange the helpers to
5776 * allow the final write back to zero inactives.
5777 *
5778 * In the meantime, just emit the moves.
5779 */
5780
3a7be554 5781static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
5782{
5783 return do_mov_z(s, a->rd, a->rn);
5784}
5785
3a7be554 5786static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
5787{
5788 if (sve_access_check(s)) {
5789 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5790 }
5791 return true;
5792}
5793
3a7be554 5794static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 5795{
60245996 5796 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 5797}