]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Split out gen_gvec_ool_zzz
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
cc48affe 35#include "fpu/softfloat.h"
38388f7e 36
757f9cff 37
9ee3a611
RH
38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
38cadeba
RH
41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
757f9cff
RH
43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
c4e7c493 46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 49
ccd841c3
RH
50/*
51 * Helpers for extracting complex instruction fields.
52 */
53
54/* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
451e4ffd 57static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
58{
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61}
62
451e4ffd 63static int tszimm_shr(DisasContext *s, int x)
ccd841c3 64{
451e4ffd 65 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
66}
67
68/* See e.g. LSL (immediate, predicated). */
451e4ffd 69static int tszimm_shl(DisasContext *s, int x)
ccd841c3 70{
451e4ffd 71 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
72}
73
451e4ffd 74static inline int plus1(DisasContext *s, int x)
24e82e68
RH
75{
76 return x + 1;
77}
78
f25a2361 79/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 80static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
81{
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83}
84
451e4ffd 85static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
86{
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88}
89
c4e7c493
RH
90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
451e4ffd 93static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
94{
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97}
98
38388f7e
RH
99/*
100 * Include the generated decoder.
101 */
102
139c1837 103#include "decode-sve.c.inc"
38388f7e
RH
104
105/*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
d1822297
RH
109/* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112static inline int pred_full_reg_offset(DisasContext *s, int regno)
113{
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115}
116
117/* Return the byte size of the whole predicate register, VL / 64. */
118static inline int pred_full_reg_size(DisasContext *s)
119{
120 return s->sve_len >> 3;
121}
122
516e246a
RH
123/* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131static int size_for_gvec(int size)
132{
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138}
139
140static int pred_gvec_reg_size(DisasContext *s)
141{
142 return size_for_gvec(pred_full_reg_size(s));
143}
144
e645d1a1
RH
145/* Invoke an out-of-line helper on 3 Zregs. */
146static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
147 int rd, int rn, int rm, int data)
148{
149 unsigned vsz = vec_full_reg_size(s);
150 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn),
152 vec_full_reg_offset(s, rm),
153 vsz, vsz, data, fn);
154}
155
96a461f7
RH
156/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
157static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
158 int rd, int rn, int pg, int data)
159{
160 unsigned vsz = vec_full_reg_size(s);
161 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
162 vec_full_reg_offset(s, rn),
163 pred_full_reg_offset(s, pg),
164 vsz, vsz, data, fn);
165}
166
36cbb7a8
RH
167/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
168static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
169 int rd, int rn, int rm, int pg, int data)
170{
171 unsigned vsz = vec_full_reg_size(s);
172 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
173 vec_full_reg_offset(s, rn),
174 vec_full_reg_offset(s, rm),
175 pred_full_reg_offset(s, pg),
176 vsz, vsz, data, fn);
177}
f7d79c41 178
36cbb7a8 179/* Invoke a vector expander on two Zregs. */
f7d79c41
RH
180static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
181 int esz, int rd, int rn)
38388f7e 182{
f7d79c41
RH
183 unsigned vsz = vec_full_reg_size(s);
184 gvec_fn(esz, vec_full_reg_offset(s, rd),
185 vec_full_reg_offset(s, rn), vsz, vsz);
38388f7e
RH
186}
187
39eea561 188/* Invoke a vector expander on three Zregs. */
28c4da31
RH
189static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
190 int esz, int rd, int rn, int rm)
38388f7e 191{
28c4da31
RH
192 unsigned vsz = vec_full_reg_size(s);
193 gvec_fn(esz, vec_full_reg_offset(s, rd),
194 vec_full_reg_offset(s, rn),
195 vec_full_reg_offset(s, rm), vsz, vsz);
38388f7e
RH
196}
197
39eea561
RH
198/* Invoke a vector move on two Zregs. */
199static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 200{
f7d79c41
RH
201 if (sve_access_check(s)) {
202 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
203 }
204 return true;
38388f7e
RH
205}
206
d9d78dcc
RH
207/* Initialize a Zreg with replications of a 64-bit immediate. */
208static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
209{
210 unsigned vsz = vec_full_reg_size(s);
8711e71f 211 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
212}
213
516e246a 214/* Invoke a vector expander on three Pregs. */
dd81a8d7
RH
215static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
216 int rd, int rn, int rm)
516e246a 217{
dd81a8d7
RH
218 unsigned psz = pred_gvec_reg_size(s);
219 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
220 pred_full_reg_offset(s, rn),
221 pred_full_reg_offset(s, rm), psz, psz);
516e246a
RH
222}
223
224/* Invoke a vector move on two Pregs. */
225static bool do_mov_p(DisasContext *s, int rd, int rn)
226{
d0b2df5a
RH
227 if (sve_access_check(s)) {
228 unsigned psz = pred_gvec_reg_size(s);
229 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
230 pred_full_reg_offset(s, rn), psz, psz);
231 }
232 return true;
516e246a
RH
233}
234
9e18d7a6
RH
235/* Set the cpu flags as per a return from an SVE helper. */
236static void do_pred_flags(TCGv_i32 t)
237{
238 tcg_gen_mov_i32(cpu_NF, t);
239 tcg_gen_andi_i32(cpu_ZF, t, 2);
240 tcg_gen_andi_i32(cpu_CF, t, 1);
241 tcg_gen_movi_i32(cpu_VF, 0);
242}
243
244/* Subroutines computing the ARM PredTest psuedofunction. */
245static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
246{
247 TCGv_i32 t = tcg_temp_new_i32();
248
249 gen_helper_sve_predtest1(t, d, g);
250 do_pred_flags(t);
251 tcg_temp_free_i32(t);
252}
253
254static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
255{
256 TCGv_ptr dptr = tcg_temp_new_ptr();
257 TCGv_ptr gptr = tcg_temp_new_ptr();
258 TCGv_i32 t;
259
260 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
261 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
262 t = tcg_const_i32(words);
263
264 gen_helper_sve_predtest(t, dptr, gptr, t);
265 tcg_temp_free_ptr(dptr);
266 tcg_temp_free_ptr(gptr);
267
268 do_pred_flags(t);
269 tcg_temp_free_i32(t);
270}
271
028e2a7b
RH
272/* For each element size, the bits within a predicate word that are active. */
273const uint64_t pred_esz_masks[4] = {
274 0xffffffffffffffffull, 0x5555555555555555ull,
275 0x1111111111111111ull, 0x0101010101010101ull
276};
277
39eea561
RH
278/*
279 *** SVE Logical - Unpredicated Group
280 */
281
28c4da31
RH
282static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
283{
284 if (sve_access_check(s)) {
285 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
286 }
287 return true;
288}
289
3a7be554 290static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 291{
28c4da31 292 return do_zzz_fn(s, a, tcg_gen_gvec_and);
39eea561
RH
293}
294
3a7be554 295static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 296{
28c4da31 297 return do_zzz_fn(s, a, tcg_gen_gvec_or);
39eea561
RH
298}
299
3a7be554 300static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 301{
28c4da31 302 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
39eea561
RH
303}
304
3a7be554 305static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 306{
28c4da31 307 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
38388f7e 308}
d1822297 309
fea98f9c
RH
310/*
311 *** SVE Integer Arithmetic - Unpredicated Group
312 */
313
3a7be554 314static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 315{
28c4da31 316 return do_zzz_fn(s, a, tcg_gen_gvec_add);
fea98f9c
RH
317}
318
3a7be554 319static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 320{
28c4da31 321 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
fea98f9c
RH
322}
323
3a7be554 324static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 325{
28c4da31 326 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
fea98f9c
RH
327}
328
3a7be554 329static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 330{
28c4da31 331 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
fea98f9c
RH
332}
333
3a7be554 334static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 335{
28c4da31 336 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
fea98f9c
RH
337}
338
3a7be554 339static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c 340{
28c4da31 341 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
fea98f9c
RH
342}
343
f97cfd59
RH
344/*
345 *** SVE Integer Arithmetic - Binary Predicated Group
346 */
347
348static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
349{
f97cfd59
RH
350 if (fn == NULL) {
351 return false;
352 }
353 if (sve_access_check(s)) {
36cbb7a8 354 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
f97cfd59
RH
355 }
356 return true;
357}
358
a2103582
RH
359/* Select active elememnts from Zn and inactive elements from Zm,
360 * storing the result in Zd.
361 */
362static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
363{
364 static gen_helper_gvec_4 * const fns[4] = {
365 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
366 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
367 };
36cbb7a8 368 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
a2103582
RH
369}
370
f97cfd59 371#define DO_ZPZZ(NAME, name) \
3a7be554 372static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
373{ \
374 static gen_helper_gvec_4 * const fns[4] = { \
375 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
376 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
377 }; \
378 return do_zpzz_ool(s, a, fns[a->esz]); \
379}
380
381DO_ZPZZ(AND, and)
382DO_ZPZZ(EOR, eor)
383DO_ZPZZ(ORR, orr)
384DO_ZPZZ(BIC, bic)
385
386DO_ZPZZ(ADD, add)
387DO_ZPZZ(SUB, sub)
388
389DO_ZPZZ(SMAX, smax)
390DO_ZPZZ(UMAX, umax)
391DO_ZPZZ(SMIN, smin)
392DO_ZPZZ(UMIN, umin)
393DO_ZPZZ(SABD, sabd)
394DO_ZPZZ(UABD, uabd)
395
396DO_ZPZZ(MUL, mul)
397DO_ZPZZ(SMULH, smulh)
398DO_ZPZZ(UMULH, umulh)
399
27721dbb
RH
400DO_ZPZZ(ASR, asr)
401DO_ZPZZ(LSR, lsr)
402DO_ZPZZ(LSL, lsl)
403
3a7be554 404static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
405{
406 static gen_helper_gvec_4 * const fns[4] = {
407 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
408 };
409 return do_zpzz_ool(s, a, fns[a->esz]);
410}
411
3a7be554 412static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
413{
414 static gen_helper_gvec_4 * const fns[4] = {
415 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
416 };
417 return do_zpzz_ool(s, a, fns[a->esz]);
418}
419
3a7be554 420static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
421{
422 if (sve_access_check(s)) {
423 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
424 }
425 return true;
426}
d3fe4a29 427
f97cfd59
RH
428#undef DO_ZPZZ
429
afac6d04
RH
430/*
431 *** SVE Integer Arithmetic - Unary Predicated Group
432 */
433
434static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
435{
436 if (fn == NULL) {
437 return false;
438 }
439 if (sve_access_check(s)) {
96a461f7 440 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
afac6d04
RH
441 }
442 return true;
443}
444
445#define DO_ZPZ(NAME, name) \
3a7be554 446static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
447{ \
448 static gen_helper_gvec_3 * const fns[4] = { \
449 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
450 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
451 }; \
452 return do_zpz_ool(s, a, fns[a->esz]); \
453}
454
455DO_ZPZ(CLS, cls)
456DO_ZPZ(CLZ, clz)
457DO_ZPZ(CNT_zpz, cnt_zpz)
458DO_ZPZ(CNOT, cnot)
459DO_ZPZ(NOT_zpz, not_zpz)
460DO_ZPZ(ABS, abs)
461DO_ZPZ(NEG, neg)
462
3a7be554 463static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
464{
465 static gen_helper_gvec_3 * const fns[4] = {
466 NULL,
467 gen_helper_sve_fabs_h,
468 gen_helper_sve_fabs_s,
469 gen_helper_sve_fabs_d
470 };
471 return do_zpz_ool(s, a, fns[a->esz]);
472}
473
3a7be554 474static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
475{
476 static gen_helper_gvec_3 * const fns[4] = {
477 NULL,
478 gen_helper_sve_fneg_h,
479 gen_helper_sve_fneg_s,
480 gen_helper_sve_fneg_d
481 };
482 return do_zpz_ool(s, a, fns[a->esz]);
483}
484
3a7be554 485static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
486{
487 static gen_helper_gvec_3 * const fns[4] = {
488 NULL,
489 gen_helper_sve_sxtb_h,
490 gen_helper_sve_sxtb_s,
491 gen_helper_sve_sxtb_d
492 };
493 return do_zpz_ool(s, a, fns[a->esz]);
494}
495
3a7be554 496static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
497{
498 static gen_helper_gvec_3 * const fns[4] = {
499 NULL,
500 gen_helper_sve_uxtb_h,
501 gen_helper_sve_uxtb_s,
502 gen_helper_sve_uxtb_d
503 };
504 return do_zpz_ool(s, a, fns[a->esz]);
505}
506
3a7be554 507static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
508{
509 static gen_helper_gvec_3 * const fns[4] = {
510 NULL, NULL,
511 gen_helper_sve_sxth_s,
512 gen_helper_sve_sxth_d
513 };
514 return do_zpz_ool(s, a, fns[a->esz]);
515}
516
3a7be554 517static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
518{
519 static gen_helper_gvec_3 * const fns[4] = {
520 NULL, NULL,
521 gen_helper_sve_uxth_s,
522 gen_helper_sve_uxth_d
523 };
524 return do_zpz_ool(s, a, fns[a->esz]);
525}
526
3a7be554 527static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
528{
529 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
530}
531
3a7be554 532static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
533{
534 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
535}
536
537#undef DO_ZPZ
538
047cec97
RH
539/*
540 *** SVE Integer Reduction Group
541 */
542
543typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
544static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
545 gen_helper_gvec_reduc *fn)
546{
547 unsigned vsz = vec_full_reg_size(s);
548 TCGv_ptr t_zn, t_pg;
549 TCGv_i32 desc;
550 TCGv_i64 temp;
551
552 if (fn == NULL) {
553 return false;
554 }
555 if (!sve_access_check(s)) {
556 return true;
557 }
558
559 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
560 temp = tcg_temp_new_i64();
561 t_zn = tcg_temp_new_ptr();
562 t_pg = tcg_temp_new_ptr();
563
564 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
565 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
566 fn(temp, t_zn, t_pg, desc);
567 tcg_temp_free_ptr(t_zn);
568 tcg_temp_free_ptr(t_pg);
569 tcg_temp_free_i32(desc);
570
571 write_fp_dreg(s, a->rd, temp);
572 tcg_temp_free_i64(temp);
573 return true;
574}
575
576#define DO_VPZ(NAME, name) \
3a7be554 577static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
578{ \
579 static gen_helper_gvec_reduc * const fns[4] = { \
580 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
581 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
582 }; \
583 return do_vpz_ool(s, a, fns[a->esz]); \
584}
585
586DO_VPZ(ORV, orv)
587DO_VPZ(ANDV, andv)
588DO_VPZ(EORV, eorv)
589
590DO_VPZ(UADDV, uaddv)
591DO_VPZ(SMAXV, smaxv)
592DO_VPZ(UMAXV, umaxv)
593DO_VPZ(SMINV, sminv)
594DO_VPZ(UMINV, uminv)
595
3a7be554 596static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
597{
598 static gen_helper_gvec_reduc * const fns[4] = {
599 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
600 gen_helper_sve_saddv_s, NULL
601 };
602 return do_vpz_ool(s, a, fns[a->esz]);
603}
604
605#undef DO_VPZ
606
ccd841c3
RH
607/*
608 *** SVE Shift by Immediate - Predicated Group
609 */
610
60245996
RH
611/*
612 * Copy Zn into Zd, storing zeros into inactive elements.
613 * If invert, store zeros into the active elements.
ccd841c3 614 */
60245996
RH
615static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
616 int esz, bool invert)
ccd841c3 617{
60245996
RH
618 static gen_helper_gvec_3 * const fns[4] = {
619 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
620 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
ccd841c3 621 };
60245996 622
ccd841c3 623 if (sve_access_check(s)) {
96a461f7 624 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
ccd841c3
RH
625 }
626 return true;
627}
628
629static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
630 gen_helper_gvec_3 *fn)
631{
632 if (sve_access_check(s)) {
96a461f7 633 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
ccd841c3
RH
634 }
635 return true;
636}
637
3a7be554 638static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
639{
640 static gen_helper_gvec_3 * const fns[4] = {
641 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
642 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
643 };
644 if (a->esz < 0) {
645 /* Invalid tsz encoding -- see tszimm_esz. */
646 return false;
647 }
648 /* Shift by element size is architecturally valid. For
649 arithmetic right-shift, it's the same as by one less. */
650 a->imm = MIN(a->imm, (8 << a->esz) - 1);
651 return do_zpzi_ool(s, a, fns[a->esz]);
652}
653
3a7be554 654static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
655{
656 static gen_helper_gvec_3 * const fns[4] = {
657 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
658 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
659 };
660 if (a->esz < 0) {
661 return false;
662 }
663 /* Shift by element size is architecturally valid.
664 For logical shifts, it is a zeroing operation. */
665 if (a->imm >= (8 << a->esz)) {
60245996 666 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
667 } else {
668 return do_zpzi_ool(s, a, fns[a->esz]);
669 }
670}
671
3a7be554 672static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
673{
674 static gen_helper_gvec_3 * const fns[4] = {
675 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
676 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
677 };
678 if (a->esz < 0) {
679 return false;
680 }
681 /* Shift by element size is architecturally valid.
682 For logical shifts, it is a zeroing operation. */
683 if (a->imm >= (8 << a->esz)) {
60245996 684 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
685 } else {
686 return do_zpzi_ool(s, a, fns[a->esz]);
687 }
688}
689
3a7be554 690static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
691{
692 static gen_helper_gvec_3 * const fns[4] = {
693 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
694 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
695 };
696 if (a->esz < 0) {
697 return false;
698 }
699 /* Shift by element size is architecturally valid. For arithmetic
700 right shift for division, it is a zeroing operation. */
701 if (a->imm >= (8 << a->esz)) {
60245996 702 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
ccd841c3
RH
703 } else {
704 return do_zpzi_ool(s, a, fns[a->esz]);
705 }
706}
707
fe7f8dfb
RH
708/*
709 *** SVE Bitwise Shift - Predicated Group
710 */
711
712#define DO_ZPZW(NAME, name) \
3a7be554 713static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
714{ \
715 static gen_helper_gvec_4 * const fns[3] = { \
716 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
717 gen_helper_sve_##name##_zpzw_s, \
718 }; \
719 if (a->esz < 0 || a->esz >= 3) { \
720 return false; \
721 } \
722 return do_zpzz_ool(s, a, fns[a->esz]); \
723}
724
725DO_ZPZW(ASR, asr)
726DO_ZPZW(LSR, lsr)
727DO_ZPZW(LSL, lsl)
728
729#undef DO_ZPZW
730
d9d78dcc
RH
731/*
732 *** SVE Bitwise Shift - Unpredicated Group
733 */
734
735static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
736 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
737 int64_t, uint32_t, uint32_t))
738{
739 if (a->esz < 0) {
740 /* Invalid tsz encoding -- see tszimm_esz. */
741 return false;
742 }
743 if (sve_access_check(s)) {
744 unsigned vsz = vec_full_reg_size(s);
745 /* Shift by element size is architecturally valid. For
746 arithmetic right-shift, it's the same as by one less.
747 Otherwise it is a zeroing operation. */
748 if (a->imm >= 8 << a->esz) {
749 if (asr) {
750 a->imm = (8 << a->esz) - 1;
751 } else {
752 do_dupi_z(s, a->rd, 0);
753 return true;
754 }
755 }
756 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
757 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
758 }
759 return true;
760}
761
3a7be554 762static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
763{
764 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
765}
766
3a7be554 767static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
768{
769 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
770}
771
3a7be554 772static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
773{
774 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
775}
776
777static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
778{
779 if (fn == NULL) {
780 return false;
781 }
782 if (sve_access_check(s)) {
e645d1a1 783 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
d9d78dcc
RH
784 }
785 return true;
786}
787
788#define DO_ZZW(NAME, name) \
3a7be554 789static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
790{ \
791 static gen_helper_gvec_3 * const fns[4] = { \
792 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
793 gen_helper_sve_##name##_zzw_s, NULL \
794 }; \
795 return do_zzw_ool(s, a, fns[a->esz]); \
796}
797
798DO_ZZW(ASR, asr)
799DO_ZZW(LSR, lsr)
800DO_ZZW(LSL, lsl)
801
802#undef DO_ZZW
803
96a36e4a
RH
804/*
805 *** SVE Integer Multiply-Add Group
806 */
807
808static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
809 gen_helper_gvec_5 *fn)
810{
811 if (sve_access_check(s)) {
812 unsigned vsz = vec_full_reg_size(s);
813 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
814 vec_full_reg_offset(s, a->ra),
815 vec_full_reg_offset(s, a->rn),
816 vec_full_reg_offset(s, a->rm),
817 pred_full_reg_offset(s, a->pg),
818 vsz, vsz, 0, fn);
819 }
820 return true;
821}
822
823#define DO_ZPZZZ(NAME, name) \
3a7be554 824static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
825{ \
826 static gen_helper_gvec_5 * const fns[4] = { \
827 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
828 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
829 }; \
830 return do_zpzzz_ool(s, a, fns[a->esz]); \
831}
832
833DO_ZPZZZ(MLA, mla)
834DO_ZPZZZ(MLS, mls)
835
836#undef DO_ZPZZZ
837
9a56c9c3
RH
838/*
839 *** SVE Index Generation Group
840 */
841
842static void do_index(DisasContext *s, int esz, int rd,
843 TCGv_i64 start, TCGv_i64 incr)
844{
845 unsigned vsz = vec_full_reg_size(s);
846 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
847 TCGv_ptr t_zd = tcg_temp_new_ptr();
848
849 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
850 if (esz == 3) {
851 gen_helper_sve_index_d(t_zd, start, incr, desc);
852 } else {
853 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
854 static index_fn * const fns[3] = {
855 gen_helper_sve_index_b,
856 gen_helper_sve_index_h,
857 gen_helper_sve_index_s,
858 };
859 TCGv_i32 s32 = tcg_temp_new_i32();
860 TCGv_i32 i32 = tcg_temp_new_i32();
861
862 tcg_gen_extrl_i64_i32(s32, start);
863 tcg_gen_extrl_i64_i32(i32, incr);
864 fns[esz](t_zd, s32, i32, desc);
865
866 tcg_temp_free_i32(s32);
867 tcg_temp_free_i32(i32);
868 }
869 tcg_temp_free_ptr(t_zd);
870 tcg_temp_free_i32(desc);
871}
872
3a7be554 873static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
874{
875 if (sve_access_check(s)) {
876 TCGv_i64 start = tcg_const_i64(a->imm1);
877 TCGv_i64 incr = tcg_const_i64(a->imm2);
878 do_index(s, a->esz, a->rd, start, incr);
879 tcg_temp_free_i64(start);
880 tcg_temp_free_i64(incr);
881 }
882 return true;
883}
884
3a7be554 885static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
886{
887 if (sve_access_check(s)) {
888 TCGv_i64 start = tcg_const_i64(a->imm);
889 TCGv_i64 incr = cpu_reg(s, a->rm);
890 do_index(s, a->esz, a->rd, start, incr);
891 tcg_temp_free_i64(start);
892 }
893 return true;
894}
895
3a7be554 896static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
897{
898 if (sve_access_check(s)) {
899 TCGv_i64 start = cpu_reg(s, a->rn);
900 TCGv_i64 incr = tcg_const_i64(a->imm);
901 do_index(s, a->esz, a->rd, start, incr);
902 tcg_temp_free_i64(incr);
903 }
904 return true;
905}
906
3a7be554 907static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
908{
909 if (sve_access_check(s)) {
910 TCGv_i64 start = cpu_reg(s, a->rn);
911 TCGv_i64 incr = cpu_reg(s, a->rm);
912 do_index(s, a->esz, a->rd, start, incr);
913 }
914 return true;
915}
916
96f922cc
RH
917/*
918 *** SVE Stack Allocation Group
919 */
920
3a7be554 921static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 922{
5de56742
AC
923 if (sve_access_check(s)) {
924 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
925 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
926 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
927 }
96f922cc
RH
928 return true;
929}
930
3a7be554 931static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 932{
5de56742
AC
933 if (sve_access_check(s)) {
934 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
935 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
936 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
937 }
96f922cc
RH
938 return true;
939}
940
3a7be554 941static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 942{
5de56742
AC
943 if (sve_access_check(s)) {
944 TCGv_i64 reg = cpu_reg(s, a->rd);
945 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
946 }
96f922cc
RH
947 return true;
948}
949
4b242d9c
RH
950/*
951 *** SVE Compute Vector Address Group
952 */
953
954static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
955{
956 if (sve_access_check(s)) {
e645d1a1 957 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
4b242d9c
RH
958 }
959 return true;
960}
961
3a7be554 962static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
963{
964 return do_adr(s, a, gen_helper_sve_adr_p32);
965}
966
3a7be554 967static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
968{
969 return do_adr(s, a, gen_helper_sve_adr_p64);
970}
971
3a7be554 972static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
973{
974 return do_adr(s, a, gen_helper_sve_adr_s32);
975}
976
3a7be554 977static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
978{
979 return do_adr(s, a, gen_helper_sve_adr_u32);
980}
981
0762cd42
RH
982/*
983 *** SVE Integer Misc - Unpredicated Group
984 */
985
3a7be554 986static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
0762cd42
RH
987{
988 static gen_helper_gvec_2 * const fns[4] = {
989 NULL,
990 gen_helper_sve_fexpa_h,
991 gen_helper_sve_fexpa_s,
992 gen_helper_sve_fexpa_d,
993 };
994 if (a->esz == 0) {
995 return false;
996 }
997 if (sve_access_check(s)) {
998 unsigned vsz = vec_full_reg_size(s);
999 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1000 vec_full_reg_offset(s, a->rn),
1001 vsz, vsz, 0, fns[a->esz]);
1002 }
1003 return true;
1004}
1005
3a7be554 1006static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1007{
1008 static gen_helper_gvec_3 * const fns[4] = {
1009 NULL,
1010 gen_helper_sve_ftssel_h,
1011 gen_helper_sve_ftssel_s,
1012 gen_helper_sve_ftssel_d,
1013 };
1014 if (a->esz == 0) {
1015 return false;
1016 }
1017 if (sve_access_check(s)) {
e645d1a1 1018 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
a1f233f2
RH
1019 }
1020 return true;
1021}
1022
516e246a
RH
1023/*
1024 *** SVE Predicate Logical Operations Group
1025 */
1026
1027static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1028 const GVecGen4 *gvec_op)
1029{
1030 if (!sve_access_check(s)) {
1031 return true;
1032 }
1033
1034 unsigned psz = pred_gvec_reg_size(s);
1035 int dofs = pred_full_reg_offset(s, a->rd);
1036 int nofs = pred_full_reg_offset(s, a->rn);
1037 int mofs = pred_full_reg_offset(s, a->rm);
1038 int gofs = pred_full_reg_offset(s, a->pg);
1039
dd81a8d7
RH
1040 if (!a->s) {
1041 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1042 return true;
1043 }
1044
516e246a
RH
1045 if (psz == 8) {
1046 /* Do the operation and the flags generation in temps. */
1047 TCGv_i64 pd = tcg_temp_new_i64();
1048 TCGv_i64 pn = tcg_temp_new_i64();
1049 TCGv_i64 pm = tcg_temp_new_i64();
1050 TCGv_i64 pg = tcg_temp_new_i64();
1051
1052 tcg_gen_ld_i64(pn, cpu_env, nofs);
1053 tcg_gen_ld_i64(pm, cpu_env, mofs);
1054 tcg_gen_ld_i64(pg, cpu_env, gofs);
1055
1056 gvec_op->fni8(pd, pn, pm, pg);
1057 tcg_gen_st_i64(pd, cpu_env, dofs);
1058
1059 do_predtest1(pd, pg);
1060
1061 tcg_temp_free_i64(pd);
1062 tcg_temp_free_i64(pn);
1063 tcg_temp_free_i64(pm);
1064 tcg_temp_free_i64(pg);
1065 } else {
1066 /* The operation and flags generation is large. The computation
1067 * of the flags depends on the original contents of the guarding
1068 * predicate. If the destination overwrites the guarding predicate,
1069 * then the easiest way to get this right is to save a copy.
1070 */
1071 int tofs = gofs;
1072 if (a->rd == a->pg) {
1073 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1074 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1075 }
1076
1077 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1078 do_predtest(s, dofs, tofs, psz / 8);
1079 }
1080 return true;
1081}
1082
1083static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1084{
1085 tcg_gen_and_i64(pd, pn, pm);
1086 tcg_gen_and_i64(pd, pd, pg);
1087}
1088
1089static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1090 TCGv_vec pm, TCGv_vec pg)
1091{
1092 tcg_gen_and_vec(vece, pd, pn, pm);
1093 tcg_gen_and_vec(vece, pd, pd, pg);
1094}
1095
3a7be554 1096static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1097{
1098 static const GVecGen4 op = {
1099 .fni8 = gen_and_pg_i64,
1100 .fniv = gen_and_pg_vec,
1101 .fno = gen_helper_sve_and_pppp,
1102 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1103 };
dd81a8d7
RH
1104
1105 if (!a->s) {
1106 if (!sve_access_check(s)) {
1107 return true;
1108 }
1109 if (a->rn == a->rm) {
1110 if (a->pg == a->rn) {
1111 do_mov_p(s, a->rd, a->rn);
1112 } else {
1113 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1114 }
1115 return true;
1116 } else if (a->pg == a->rn || a->pg == a->rm) {
1117 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1118 return true;
516e246a 1119 }
516e246a 1120 }
dd81a8d7 1121 return do_pppp_flags(s, a, &op);
516e246a
RH
1122}
1123
1124static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1125{
1126 tcg_gen_andc_i64(pd, pn, pm);
1127 tcg_gen_and_i64(pd, pd, pg);
1128}
1129
1130static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1131 TCGv_vec pm, TCGv_vec pg)
1132{
1133 tcg_gen_andc_vec(vece, pd, pn, pm);
1134 tcg_gen_and_vec(vece, pd, pd, pg);
1135}
1136
3a7be554 1137static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1138{
1139 static const GVecGen4 op = {
1140 .fni8 = gen_bic_pg_i64,
1141 .fniv = gen_bic_pg_vec,
1142 .fno = gen_helper_sve_bic_pppp,
1143 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1144 };
dd81a8d7
RH
1145
1146 if (!a->s && a->pg == a->rn) {
1147 if (sve_access_check(s)) {
1148 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1149 }
1150 return true;
516e246a 1151 }
dd81a8d7 1152 return do_pppp_flags(s, a, &op);
516e246a
RH
1153}
1154
1155static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1156{
1157 tcg_gen_xor_i64(pd, pn, pm);
1158 tcg_gen_and_i64(pd, pd, pg);
1159}
1160
1161static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1162 TCGv_vec pm, TCGv_vec pg)
1163{
1164 tcg_gen_xor_vec(vece, pd, pn, pm);
1165 tcg_gen_and_vec(vece, pd, pd, pg);
1166}
1167
3a7be554 1168static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1169{
1170 static const GVecGen4 op = {
1171 .fni8 = gen_eor_pg_i64,
1172 .fniv = gen_eor_pg_vec,
1173 .fno = gen_helper_sve_eor_pppp,
1174 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1175 };
dd81a8d7 1176 return do_pppp_flags(s, a, &op);
516e246a
RH
1177}
1178
3a7be554 1179static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a 1180{
516e246a
RH
1181 if (a->s) {
1182 return false;
516e246a 1183 }
d4bc6232
RH
1184 if (sve_access_check(s)) {
1185 unsigned psz = pred_gvec_reg_size(s);
1186 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1187 pred_full_reg_offset(s, a->pg),
1188 pred_full_reg_offset(s, a->rn),
1189 pred_full_reg_offset(s, a->rm), psz, psz);
1190 }
1191 return true;
516e246a
RH
1192}
1193
1194static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1195{
1196 tcg_gen_or_i64(pd, pn, pm);
1197 tcg_gen_and_i64(pd, pd, pg);
1198}
1199
1200static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1201 TCGv_vec pm, TCGv_vec pg)
1202{
1203 tcg_gen_or_vec(vece, pd, pn, pm);
1204 tcg_gen_and_vec(vece, pd, pd, pg);
1205}
1206
3a7be554 1207static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1208{
1209 static const GVecGen4 op = {
1210 .fni8 = gen_orr_pg_i64,
1211 .fniv = gen_orr_pg_vec,
1212 .fno = gen_helper_sve_orr_pppp,
1213 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1214 };
dd81a8d7
RH
1215
1216 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
516e246a 1217 return do_mov_p(s, a->rd, a->rn);
516e246a 1218 }
dd81a8d7 1219 return do_pppp_flags(s, a, &op);
516e246a
RH
1220}
1221
1222static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1223{
1224 tcg_gen_orc_i64(pd, pn, pm);
1225 tcg_gen_and_i64(pd, pd, pg);
1226}
1227
1228static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1229 TCGv_vec pm, TCGv_vec pg)
1230{
1231 tcg_gen_orc_vec(vece, pd, pn, pm);
1232 tcg_gen_and_vec(vece, pd, pd, pg);
1233}
1234
3a7be554 1235static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1236{
1237 static const GVecGen4 op = {
1238 .fni8 = gen_orn_pg_i64,
1239 .fniv = gen_orn_pg_vec,
1240 .fno = gen_helper_sve_orn_pppp,
1241 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1242 };
dd81a8d7 1243 return do_pppp_flags(s, a, &op);
516e246a
RH
1244}
1245
1246static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1247{
1248 tcg_gen_or_i64(pd, pn, pm);
1249 tcg_gen_andc_i64(pd, pg, pd);
1250}
1251
1252static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1253 TCGv_vec pm, TCGv_vec pg)
1254{
1255 tcg_gen_or_vec(vece, pd, pn, pm);
1256 tcg_gen_andc_vec(vece, pd, pg, pd);
1257}
1258
3a7be554 1259static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1260{
1261 static const GVecGen4 op = {
1262 .fni8 = gen_nor_pg_i64,
1263 .fniv = gen_nor_pg_vec,
1264 .fno = gen_helper_sve_nor_pppp,
1265 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1266 };
dd81a8d7 1267 return do_pppp_flags(s, a, &op);
516e246a
RH
1268}
1269
1270static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1271{
1272 tcg_gen_and_i64(pd, pn, pm);
1273 tcg_gen_andc_i64(pd, pg, pd);
1274}
1275
1276static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1277 TCGv_vec pm, TCGv_vec pg)
1278{
1279 tcg_gen_and_vec(vece, pd, pn, pm);
1280 tcg_gen_andc_vec(vece, pd, pg, pd);
1281}
1282
3a7be554 1283static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1284{
1285 static const GVecGen4 op = {
1286 .fni8 = gen_nand_pg_i64,
1287 .fniv = gen_nand_pg_vec,
1288 .fno = gen_helper_sve_nand_pppp,
1289 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1290 };
dd81a8d7 1291 return do_pppp_flags(s, a, &op);
516e246a
RH
1292}
1293
9e18d7a6
RH
1294/*
1295 *** SVE Predicate Misc Group
1296 */
1297
3a7be554 1298static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1299{
1300 if (sve_access_check(s)) {
1301 int nofs = pred_full_reg_offset(s, a->rn);
1302 int gofs = pred_full_reg_offset(s, a->pg);
1303 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1304
1305 if (words == 1) {
1306 TCGv_i64 pn = tcg_temp_new_i64();
1307 TCGv_i64 pg = tcg_temp_new_i64();
1308
1309 tcg_gen_ld_i64(pn, cpu_env, nofs);
1310 tcg_gen_ld_i64(pg, cpu_env, gofs);
1311 do_predtest1(pn, pg);
1312
1313 tcg_temp_free_i64(pn);
1314 tcg_temp_free_i64(pg);
1315 } else {
1316 do_predtest(s, nofs, gofs, words);
1317 }
1318 }
1319 return true;
1320}
1321
028e2a7b
RH
1322/* See the ARM pseudocode DecodePredCount. */
1323static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1324{
1325 unsigned elements = fullsz >> esz;
1326 unsigned bound;
1327
1328 switch (pattern) {
1329 case 0x0: /* POW2 */
1330 return pow2floor(elements);
1331 case 0x1: /* VL1 */
1332 case 0x2: /* VL2 */
1333 case 0x3: /* VL3 */
1334 case 0x4: /* VL4 */
1335 case 0x5: /* VL5 */
1336 case 0x6: /* VL6 */
1337 case 0x7: /* VL7 */
1338 case 0x8: /* VL8 */
1339 bound = pattern;
1340 break;
1341 case 0x9: /* VL16 */
1342 case 0xa: /* VL32 */
1343 case 0xb: /* VL64 */
1344 case 0xc: /* VL128 */
1345 case 0xd: /* VL256 */
1346 bound = 16 << (pattern - 9);
1347 break;
1348 case 0x1d: /* MUL4 */
1349 return elements - elements % 4;
1350 case 0x1e: /* MUL3 */
1351 return elements - elements % 3;
1352 case 0x1f: /* ALL */
1353 return elements;
1354 default: /* #uimm5 */
1355 return 0;
1356 }
1357 return elements >= bound ? bound : 0;
1358}
1359
1360/* This handles all of the predicate initialization instructions,
1361 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1362 * so that decode_pred_count returns 0. For SETFFR, we will have
1363 * set RD == 16 == FFR.
1364 */
1365static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1366{
1367 if (!sve_access_check(s)) {
1368 return true;
1369 }
1370
1371 unsigned fullsz = vec_full_reg_size(s);
1372 unsigned ofs = pred_full_reg_offset(s, rd);
1373 unsigned numelem, setsz, i;
1374 uint64_t word, lastword;
1375 TCGv_i64 t;
1376
1377 numelem = decode_pred_count(fullsz, pat, esz);
1378
1379 /* Determine what we must store into each bit, and how many. */
1380 if (numelem == 0) {
1381 lastword = word = 0;
1382 setsz = fullsz;
1383 } else {
1384 setsz = numelem << esz;
1385 lastword = word = pred_esz_masks[esz];
1386 if (setsz % 64) {
973558a3 1387 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1388 }
1389 }
1390
1391 t = tcg_temp_new_i64();
1392 if (fullsz <= 64) {
1393 tcg_gen_movi_i64(t, lastword);
1394 tcg_gen_st_i64(t, cpu_env, ofs);
1395 goto done;
1396 }
1397
1398 if (word == lastword) {
1399 unsigned maxsz = size_for_gvec(fullsz / 8);
1400 unsigned oprsz = size_for_gvec(setsz / 8);
1401
1402 if (oprsz * 8 == setsz) {
8711e71f 1403 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1404 goto done;
1405 }
028e2a7b
RH
1406 }
1407
1408 setsz /= 8;
1409 fullsz /= 8;
1410
1411 tcg_gen_movi_i64(t, word);
973558a3 1412 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1413 tcg_gen_st_i64(t, cpu_env, ofs + i);
1414 }
1415 if (lastword != word) {
1416 tcg_gen_movi_i64(t, lastword);
1417 tcg_gen_st_i64(t, cpu_env, ofs + i);
1418 i += 8;
1419 }
1420 if (i < fullsz) {
1421 tcg_gen_movi_i64(t, 0);
1422 for (; i < fullsz; i += 8) {
1423 tcg_gen_st_i64(t, cpu_env, ofs + i);
1424 }
1425 }
1426
1427 done:
1428 tcg_temp_free_i64(t);
1429
1430 /* PTRUES */
1431 if (setflag) {
1432 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1433 tcg_gen_movi_i32(cpu_CF, word == 0);
1434 tcg_gen_movi_i32(cpu_VF, 0);
1435 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1436 }
1437 return true;
1438}
1439
3a7be554 1440static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1441{
1442 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1443}
1444
3a7be554 1445static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1446{
1447 /* Note pat == 31 is #all, to set all elements. */
1448 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1449}
1450
3a7be554 1451static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1452{
1453 /* Note pat == 32 is #unimp, to set no elements. */
1454 return do_predset(s, 0, a->rd, 32, false);
1455}
1456
3a7be554 1457static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1458{
1459 /* The path through do_pppp_flags is complicated enough to want to avoid
1460 * duplication. Frob the arguments into the form of a predicated AND.
1461 */
1462 arg_rprr_s alt_a = {
1463 .rd = a->rd, .pg = a->pg, .s = a->s,
1464 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1465 };
3a7be554 1466 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1467}
1468
3a7be554 1469static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1470{
1471 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1472}
1473
3a7be554 1474static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1475{
1476 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1477}
1478
1479static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1480 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1481 TCGv_ptr, TCGv_i32))
1482{
1483 if (!sve_access_check(s)) {
1484 return true;
1485 }
1486
1487 TCGv_ptr t_pd = tcg_temp_new_ptr();
1488 TCGv_ptr t_pg = tcg_temp_new_ptr();
1489 TCGv_i32 t;
1490 unsigned desc;
1491
1492 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1493 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1494
1495 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1496 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1497 t = tcg_const_i32(desc);
1498
1499 gen_fn(t, t_pd, t_pg, t);
1500 tcg_temp_free_ptr(t_pd);
1501 tcg_temp_free_ptr(t_pg);
1502
1503 do_pred_flags(t);
1504 tcg_temp_free_i32(t);
1505 return true;
1506}
1507
3a7be554 1508static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1509{
1510 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1511}
1512
3a7be554 1513static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1514{
1515 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1516}
1517
24e82e68
RH
1518/*
1519 *** SVE Element Count Group
1520 */
1521
1522/* Perform an inline saturating addition of a 32-bit value within
1523 * a 64-bit register. The second operand is known to be positive,
1524 * which halves the comparisions we must perform to bound the result.
1525 */
1526static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1527{
1528 int64_t ibound;
1529 TCGv_i64 bound;
1530 TCGCond cond;
1531
1532 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1533 if (u) {
1534 tcg_gen_ext32u_i64(reg, reg);
1535 } else {
1536 tcg_gen_ext32s_i64(reg, reg);
1537 }
1538 if (d) {
1539 tcg_gen_sub_i64(reg, reg, val);
1540 ibound = (u ? 0 : INT32_MIN);
1541 cond = TCG_COND_LT;
1542 } else {
1543 tcg_gen_add_i64(reg, reg, val);
1544 ibound = (u ? UINT32_MAX : INT32_MAX);
1545 cond = TCG_COND_GT;
1546 }
1547 bound = tcg_const_i64(ibound);
1548 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1549 tcg_temp_free_i64(bound);
1550}
1551
1552/* Similarly with 64-bit values. */
1553static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1554{
1555 TCGv_i64 t0 = tcg_temp_new_i64();
1556 TCGv_i64 t1 = tcg_temp_new_i64();
1557 TCGv_i64 t2;
1558
1559 if (u) {
1560 if (d) {
1561 tcg_gen_sub_i64(t0, reg, val);
1562 tcg_gen_movi_i64(t1, 0);
1563 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1564 } else {
1565 tcg_gen_add_i64(t0, reg, val);
1566 tcg_gen_movi_i64(t1, -1);
1567 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1568 }
1569 } else {
1570 if (d) {
1571 /* Detect signed overflow for subtraction. */
1572 tcg_gen_xor_i64(t0, reg, val);
1573 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1574 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1575 tcg_gen_and_i64(t0, t0, reg);
1576
1577 /* Bound the result. */
1578 tcg_gen_movi_i64(reg, INT64_MIN);
1579 t2 = tcg_const_i64(0);
1580 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1581 } else {
1582 /* Detect signed overflow for addition. */
1583 tcg_gen_xor_i64(t0, reg, val);
1584 tcg_gen_add_i64(reg, reg, val);
1585 tcg_gen_xor_i64(t1, reg, val);
1586 tcg_gen_andc_i64(t0, t1, t0);
1587
1588 /* Bound the result. */
1589 tcg_gen_movi_i64(t1, INT64_MAX);
1590 t2 = tcg_const_i64(0);
1591 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1592 }
1593 tcg_temp_free_i64(t2);
1594 }
1595 tcg_temp_free_i64(t0);
1596 tcg_temp_free_i64(t1);
1597}
1598
1599/* Similarly with a vector and a scalar operand. */
1600static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1601 TCGv_i64 val, bool u, bool d)
1602{
1603 unsigned vsz = vec_full_reg_size(s);
1604 TCGv_ptr dptr, nptr;
1605 TCGv_i32 t32, desc;
1606 TCGv_i64 t64;
1607
1608 dptr = tcg_temp_new_ptr();
1609 nptr = tcg_temp_new_ptr();
1610 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1611 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1612 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1613
1614 switch (esz) {
1615 case MO_8:
1616 t32 = tcg_temp_new_i32();
1617 tcg_gen_extrl_i64_i32(t32, val);
1618 if (d) {
1619 tcg_gen_neg_i32(t32, t32);
1620 }
1621 if (u) {
1622 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1623 } else {
1624 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1625 }
1626 tcg_temp_free_i32(t32);
1627 break;
1628
1629 case MO_16:
1630 t32 = tcg_temp_new_i32();
1631 tcg_gen_extrl_i64_i32(t32, val);
1632 if (d) {
1633 tcg_gen_neg_i32(t32, t32);
1634 }
1635 if (u) {
1636 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1637 } else {
1638 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1639 }
1640 tcg_temp_free_i32(t32);
1641 break;
1642
1643 case MO_32:
1644 t64 = tcg_temp_new_i64();
1645 if (d) {
1646 tcg_gen_neg_i64(t64, val);
1647 } else {
1648 tcg_gen_mov_i64(t64, val);
1649 }
1650 if (u) {
1651 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1652 } else {
1653 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1654 }
1655 tcg_temp_free_i64(t64);
1656 break;
1657
1658 case MO_64:
1659 if (u) {
1660 if (d) {
1661 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1662 } else {
1663 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1664 }
1665 } else if (d) {
1666 t64 = tcg_temp_new_i64();
1667 tcg_gen_neg_i64(t64, val);
1668 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1669 tcg_temp_free_i64(t64);
1670 } else {
1671 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1672 }
1673 break;
1674
1675 default:
1676 g_assert_not_reached();
1677 }
1678
1679 tcg_temp_free_ptr(dptr);
1680 tcg_temp_free_ptr(nptr);
1681 tcg_temp_free_i32(desc);
1682}
1683
3a7be554 1684static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1685{
1686 if (sve_access_check(s)) {
1687 unsigned fullsz = vec_full_reg_size(s);
1688 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1689 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1690 }
1691 return true;
1692}
1693
3a7be554 1694static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1695{
1696 if (sve_access_check(s)) {
1697 unsigned fullsz = vec_full_reg_size(s);
1698 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1699 int inc = numelem * a->imm * (a->d ? -1 : 1);
1700 TCGv_i64 reg = cpu_reg(s, a->rd);
1701
1702 tcg_gen_addi_i64(reg, reg, inc);
1703 }
1704 return true;
1705}
1706
3a7be554 1707static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1708{
1709 if (!sve_access_check(s)) {
1710 return true;
1711 }
1712
1713 unsigned fullsz = vec_full_reg_size(s);
1714 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1715 int inc = numelem * a->imm;
1716 TCGv_i64 reg = cpu_reg(s, a->rd);
1717
1718 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1719 if (inc == 0) {
1720 if (a->u) {
1721 tcg_gen_ext32u_i64(reg, reg);
1722 } else {
1723 tcg_gen_ext32s_i64(reg, reg);
1724 }
1725 } else {
1726 TCGv_i64 t = tcg_const_i64(inc);
1727 do_sat_addsub_32(reg, t, a->u, a->d);
1728 tcg_temp_free_i64(t);
1729 }
1730 return true;
1731}
1732
3a7be554 1733static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1734{
1735 if (!sve_access_check(s)) {
1736 return true;
1737 }
1738
1739 unsigned fullsz = vec_full_reg_size(s);
1740 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1741 int inc = numelem * a->imm;
1742 TCGv_i64 reg = cpu_reg(s, a->rd);
1743
1744 if (inc != 0) {
1745 TCGv_i64 t = tcg_const_i64(inc);
1746 do_sat_addsub_64(reg, t, a->u, a->d);
1747 tcg_temp_free_i64(t);
1748 }
1749 return true;
1750}
1751
3a7be554 1752static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1753{
1754 if (a->esz == 0) {
1755 return false;
1756 }
1757
1758 unsigned fullsz = vec_full_reg_size(s);
1759 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1760 int inc = numelem * a->imm;
1761
1762 if (inc != 0) {
1763 if (sve_access_check(s)) {
1764 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1765 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1766 vec_full_reg_offset(s, a->rn),
1767 t, fullsz, fullsz);
1768 tcg_temp_free_i64(t);
1769 }
1770 } else {
1771 do_mov_z(s, a->rd, a->rn);
1772 }
1773 return true;
1774}
1775
3a7be554 1776static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1777{
1778 if (a->esz == 0) {
1779 return false;
1780 }
1781
1782 unsigned fullsz = vec_full_reg_size(s);
1783 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1784 int inc = numelem * a->imm;
1785
1786 if (inc != 0) {
1787 if (sve_access_check(s)) {
1788 TCGv_i64 t = tcg_const_i64(inc);
1789 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1790 tcg_temp_free_i64(t);
1791 }
1792 } else {
1793 do_mov_z(s, a->rd, a->rn);
1794 }
1795 return true;
1796}
1797
e1fa1164
RH
1798/*
1799 *** SVE Bitwise Immediate Group
1800 */
1801
1802static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1803{
1804 uint64_t imm;
1805 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1806 extract32(a->dbm, 0, 6),
1807 extract32(a->dbm, 6, 6))) {
1808 return false;
1809 }
1810 if (sve_access_check(s)) {
1811 unsigned vsz = vec_full_reg_size(s);
1812 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1813 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1814 }
1815 return true;
1816}
1817
3a7be554 1818static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1819{
1820 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1821}
1822
3a7be554 1823static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1824{
1825 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1826}
1827
3a7be554 1828static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1829{
1830 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1831}
1832
3a7be554 1833static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
1834{
1835 uint64_t imm;
1836 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1837 extract32(a->dbm, 0, 6),
1838 extract32(a->dbm, 6, 6))) {
1839 return false;
1840 }
1841 if (sve_access_check(s)) {
1842 do_dupi_z(s, a->rd, imm);
1843 }
1844 return true;
1845}
1846
f25a2361
RH
1847/*
1848 *** SVE Integer Wide Immediate - Predicated Group
1849 */
1850
1851/* Implement all merging copies. This is used for CPY (immediate),
1852 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1853 */
1854static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1855 TCGv_i64 val)
1856{
1857 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1858 static gen_cpy * const fns[4] = {
1859 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1860 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1861 };
1862 unsigned vsz = vec_full_reg_size(s);
1863 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1864 TCGv_ptr t_zd = tcg_temp_new_ptr();
1865 TCGv_ptr t_zn = tcg_temp_new_ptr();
1866 TCGv_ptr t_pg = tcg_temp_new_ptr();
1867
1868 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1869 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1870 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1871
1872 fns[esz](t_zd, t_zn, t_pg, val, desc);
1873
1874 tcg_temp_free_ptr(t_zd);
1875 tcg_temp_free_ptr(t_zn);
1876 tcg_temp_free_ptr(t_pg);
1877 tcg_temp_free_i32(desc);
1878}
1879
3a7be554 1880static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
1881{
1882 if (a->esz == 0) {
1883 return false;
1884 }
1885 if (sve_access_check(s)) {
1886 /* Decode the VFP immediate. */
1887 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1888 TCGv_i64 t_imm = tcg_const_i64(imm);
1889 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1890 tcg_temp_free_i64(t_imm);
1891 }
1892 return true;
1893}
1894
3a7be554 1895static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 1896{
3a7be554 1897 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1898 return false;
1899 }
1900 if (sve_access_check(s)) {
1901 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1902 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1903 tcg_temp_free_i64(t_imm);
1904 }
1905 return true;
1906}
1907
3a7be554 1908static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
1909{
1910 static gen_helper_gvec_2i * const fns[4] = {
1911 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1912 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1913 };
1914
3a7be554 1915 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1916 return false;
1917 }
1918 if (sve_access_check(s)) {
1919 unsigned vsz = vec_full_reg_size(s);
1920 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1921 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1922 pred_full_reg_offset(s, a->pg),
1923 t_imm, vsz, vsz, 0, fns[a->esz]);
1924 tcg_temp_free_i64(t_imm);
1925 }
1926 return true;
1927}
1928
b94f8f60
RH
1929/*
1930 *** SVE Permute Extract Group
1931 */
1932
3a7be554 1933static bool trans_EXT(DisasContext *s, arg_EXT *a)
b94f8f60
RH
1934{
1935 if (!sve_access_check(s)) {
1936 return true;
1937 }
1938
1939 unsigned vsz = vec_full_reg_size(s);
1940 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1941 unsigned n_siz = vsz - n_ofs;
1942 unsigned d = vec_full_reg_offset(s, a->rd);
1943 unsigned n = vec_full_reg_offset(s, a->rn);
1944 unsigned m = vec_full_reg_offset(s, a->rm);
1945
1946 /* Use host vector move insns if we have appropriate sizes
1947 * and no unfortunate overlap.
1948 */
1949 if (m != d
1950 && n_ofs == size_for_gvec(n_ofs)
1951 && n_siz == size_for_gvec(n_siz)
1952 && (d != n || n_siz <= n_ofs)) {
1953 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1954 if (n_ofs != 0) {
1955 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1956 }
1957 } else {
1958 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1959 }
1960 return true;
1961}
1962
30562ab7
RH
1963/*
1964 *** SVE Permute - Unpredicated Group
1965 */
1966
3a7be554 1967static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
1968{
1969 if (sve_access_check(s)) {
1970 unsigned vsz = vec_full_reg_size(s);
1971 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1972 vsz, vsz, cpu_reg_sp(s, a->rn));
1973 }
1974 return true;
1975}
1976
3a7be554 1977static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
1978{
1979 if ((a->imm & 0x1f) == 0) {
1980 return false;
1981 }
1982 if (sve_access_check(s)) {
1983 unsigned vsz = vec_full_reg_size(s);
1984 unsigned dofs = vec_full_reg_offset(s, a->rd);
1985 unsigned esz, index;
1986
1987 esz = ctz32(a->imm);
1988 index = a->imm >> (esz + 1);
1989
1990 if ((index << esz) < vsz) {
1991 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1992 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
1993 } else {
7e17d50e
RH
1994 /*
1995 * While dup_mem handles 128-bit elements, dup_imm does not.
1996 * Thankfully element size doesn't matter for splatting zero.
1997 */
1998 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
1999 }
2000 }
2001 return true;
2002}
2003
2004static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2005{
2006 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2007 static gen_insr * const fns[4] = {
2008 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2009 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2010 };
2011 unsigned vsz = vec_full_reg_size(s);
2012 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2013 TCGv_ptr t_zd = tcg_temp_new_ptr();
2014 TCGv_ptr t_zn = tcg_temp_new_ptr();
2015
2016 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2017 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2018
2019 fns[a->esz](t_zd, t_zn, val, desc);
2020
2021 tcg_temp_free_ptr(t_zd);
2022 tcg_temp_free_ptr(t_zn);
2023 tcg_temp_free_i32(desc);
2024}
2025
3a7be554 2026static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2027{
2028 if (sve_access_check(s)) {
2029 TCGv_i64 t = tcg_temp_new_i64();
2030 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2031 do_insr_i64(s, a, t);
2032 tcg_temp_free_i64(t);
2033 }
2034 return true;
2035}
2036
3a7be554 2037static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2038{
2039 if (sve_access_check(s)) {
2040 do_insr_i64(s, a, cpu_reg(s, a->rm));
2041 }
2042 return true;
2043}
2044
3a7be554 2045static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
30562ab7
RH
2046{
2047 static gen_helper_gvec_2 * const fns[4] = {
2048 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2049 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2050 };
2051
2052 if (sve_access_check(s)) {
2053 unsigned vsz = vec_full_reg_size(s);
2054 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2055 vec_full_reg_offset(s, a->rn),
2056 vsz, vsz, 0, fns[a->esz]);
2057 }
2058 return true;
2059}
2060
3a7be554 2061static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2062{
2063 static gen_helper_gvec_3 * const fns[4] = {
2064 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2065 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2066 };
2067
2068 if (sve_access_check(s)) {
e645d1a1 2069 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
30562ab7
RH
2070 }
2071 return true;
2072}
2073
3a7be554 2074static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2075{
2076 static gen_helper_gvec_2 * const fns[4][2] = {
2077 { NULL, NULL },
2078 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2079 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2080 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2081 };
2082
2083 if (a->esz == 0) {
2084 return false;
2085 }
2086 if (sve_access_check(s)) {
2087 unsigned vsz = vec_full_reg_size(s);
2088 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2089 vec_full_reg_offset(s, a->rn)
2090 + (a->h ? vsz / 2 : 0),
2091 vsz, vsz, 0, fns[a->esz][a->u]);
2092 }
2093 return true;
2094}
2095
d731d8cb
RH
2096/*
2097 *** SVE Permute - Predicates Group
2098 */
2099
2100static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2101 gen_helper_gvec_3 *fn)
2102{
2103 if (!sve_access_check(s)) {
2104 return true;
2105 }
2106
2107 unsigned vsz = pred_full_reg_size(s);
2108
2109 /* Predicate sizes may be smaller and cannot use simd_desc.
2110 We cannot round up, as we do elsewhere, because we need
2111 the exact size for ZIP2 and REV. We retain the style for
2112 the other helpers for consistency. */
2113 TCGv_ptr t_d = tcg_temp_new_ptr();
2114 TCGv_ptr t_n = tcg_temp_new_ptr();
2115 TCGv_ptr t_m = tcg_temp_new_ptr();
2116 TCGv_i32 t_desc;
2117 int desc;
2118
2119 desc = vsz - 2;
2120 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2121 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2122
2123 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2124 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2125 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2126 t_desc = tcg_const_i32(desc);
2127
2128 fn(t_d, t_n, t_m, t_desc);
2129
2130 tcg_temp_free_ptr(t_d);
2131 tcg_temp_free_ptr(t_n);
2132 tcg_temp_free_ptr(t_m);
2133 tcg_temp_free_i32(t_desc);
2134 return true;
2135}
2136
2137static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2138 gen_helper_gvec_2 *fn)
2139{
2140 if (!sve_access_check(s)) {
2141 return true;
2142 }
2143
2144 unsigned vsz = pred_full_reg_size(s);
2145 TCGv_ptr t_d = tcg_temp_new_ptr();
2146 TCGv_ptr t_n = tcg_temp_new_ptr();
2147 TCGv_i32 t_desc;
2148 int desc;
2149
2150 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2151 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2152
2153 /* Predicate sizes may be smaller and cannot use simd_desc.
2154 We cannot round up, as we do elsewhere, because we need
2155 the exact size for ZIP2 and REV. We retain the style for
2156 the other helpers for consistency. */
2157
2158 desc = vsz - 2;
2159 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2160 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2161 t_desc = tcg_const_i32(desc);
2162
2163 fn(t_d, t_n, t_desc);
2164
2165 tcg_temp_free_i32(t_desc);
2166 tcg_temp_free_ptr(t_d);
2167 tcg_temp_free_ptr(t_n);
2168 return true;
2169}
2170
3a7be554 2171static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2172{
2173 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2174}
2175
3a7be554 2176static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2177{
2178 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2179}
2180
3a7be554 2181static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2182{
2183 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2184}
2185
3a7be554 2186static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2187{
2188 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2189}
2190
3a7be554 2191static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2192{
2193 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2194}
2195
3a7be554 2196static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2197{
2198 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2199}
2200
3a7be554 2201static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2202{
2203 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2204}
2205
3a7be554 2206static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2207{
2208 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2209}
2210
3a7be554 2211static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2212{
2213 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2214}
2215
234b48e9
RH
2216/*
2217 *** SVE Permute - Interleaving Group
2218 */
2219
2220static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2221{
2222 static gen_helper_gvec_3 * const fns[4] = {
2223 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2224 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2225 };
2226
2227 if (sve_access_check(s)) {
2228 unsigned vsz = vec_full_reg_size(s);
2229 unsigned high_ofs = high ? vsz / 2 : 0;
2230 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2231 vec_full_reg_offset(s, a->rn) + high_ofs,
2232 vec_full_reg_offset(s, a->rm) + high_ofs,
2233 vsz, vsz, 0, fns[a->esz]);
2234 }
2235 return true;
2236}
2237
2238static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2239 gen_helper_gvec_3 *fn)
2240{
2241 if (sve_access_check(s)) {
e645d1a1 2242 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
234b48e9
RH
2243 }
2244 return true;
2245}
2246
3a7be554 2247static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2248{
2249 return do_zip(s, a, false);
2250}
2251
3a7be554 2252static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2253{
2254 return do_zip(s, a, true);
2255}
2256
2257static gen_helper_gvec_3 * const uzp_fns[4] = {
2258 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2259 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2260};
2261
3a7be554 2262static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2263{
2264 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2265}
2266
3a7be554 2267static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2268{
2269 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2270}
2271
2272static gen_helper_gvec_3 * const trn_fns[4] = {
2273 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2274 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2275};
2276
3a7be554 2277static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2278{
2279 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2280}
2281
3a7be554 2282static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2283{
2284 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2285}
2286
3ca879ae
RH
2287/*
2288 *** SVE Permute Vector - Predicated Group
2289 */
2290
3a7be554 2291static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2292{
2293 static gen_helper_gvec_3 * const fns[4] = {
2294 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2295 };
2296 return do_zpz_ool(s, a, fns[a->esz]);
2297}
2298
ef23cb72
RH
2299/* Call the helper that computes the ARM LastActiveElement pseudocode
2300 * function, scaled by the element size. This includes the not found
2301 * indication; e.g. not found for esz=3 is -8.
2302 */
2303static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2304{
2305 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2306 * round up, as we do elsewhere, because we need the exact size.
2307 */
2308 TCGv_ptr t_p = tcg_temp_new_ptr();
2309 TCGv_i32 t_desc;
2310 unsigned vsz = pred_full_reg_size(s);
2311 unsigned desc;
2312
2313 desc = vsz - 2;
2314 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2315
2316 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2317 t_desc = tcg_const_i32(desc);
2318
2319 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2320
2321 tcg_temp_free_i32(t_desc);
2322 tcg_temp_free_ptr(t_p);
2323}
2324
2325/* Increment LAST to the offset of the next element in the vector,
2326 * wrapping around to 0.
2327 */
2328static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2329{
2330 unsigned vsz = vec_full_reg_size(s);
2331
2332 tcg_gen_addi_i32(last, last, 1 << esz);
2333 if (is_power_of_2(vsz)) {
2334 tcg_gen_andi_i32(last, last, vsz - 1);
2335 } else {
2336 TCGv_i32 max = tcg_const_i32(vsz);
2337 TCGv_i32 zero = tcg_const_i32(0);
2338 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2339 tcg_temp_free_i32(max);
2340 tcg_temp_free_i32(zero);
2341 }
2342}
2343
2344/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2345static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2346{
2347 unsigned vsz = vec_full_reg_size(s);
2348
2349 if (is_power_of_2(vsz)) {
2350 tcg_gen_andi_i32(last, last, vsz - 1);
2351 } else {
2352 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2353 TCGv_i32 zero = tcg_const_i32(0);
2354 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2355 tcg_temp_free_i32(max);
2356 tcg_temp_free_i32(zero);
2357 }
2358}
2359
2360/* Load an unsigned element of ESZ from BASE+OFS. */
2361static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2362{
2363 TCGv_i64 r = tcg_temp_new_i64();
2364
2365 switch (esz) {
2366 case 0:
2367 tcg_gen_ld8u_i64(r, base, ofs);
2368 break;
2369 case 1:
2370 tcg_gen_ld16u_i64(r, base, ofs);
2371 break;
2372 case 2:
2373 tcg_gen_ld32u_i64(r, base, ofs);
2374 break;
2375 case 3:
2376 tcg_gen_ld_i64(r, base, ofs);
2377 break;
2378 default:
2379 g_assert_not_reached();
2380 }
2381 return r;
2382}
2383
2384/* Load an unsigned element of ESZ from RM[LAST]. */
2385static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2386 int rm, int esz)
2387{
2388 TCGv_ptr p = tcg_temp_new_ptr();
2389 TCGv_i64 r;
2390
2391 /* Convert offset into vector into offset into ENV.
2392 * The final adjustment for the vector register base
2393 * is added via constant offset to the load.
2394 */
2395#ifdef HOST_WORDS_BIGENDIAN
2396 /* Adjust for element ordering. See vec_reg_offset. */
2397 if (esz < 3) {
2398 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2399 }
2400#endif
2401 tcg_gen_ext_i32_ptr(p, last);
2402 tcg_gen_add_ptr(p, p, cpu_env);
2403
2404 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2405 tcg_temp_free_ptr(p);
2406
2407 return r;
2408}
2409
2410/* Compute CLAST for a Zreg. */
2411static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2412{
2413 TCGv_i32 last;
2414 TCGLabel *over;
2415 TCGv_i64 ele;
2416 unsigned vsz, esz = a->esz;
2417
2418 if (!sve_access_check(s)) {
2419 return true;
2420 }
2421
2422 last = tcg_temp_local_new_i32();
2423 over = gen_new_label();
2424
2425 find_last_active(s, last, esz, a->pg);
2426
2427 /* There is of course no movcond for a 2048-bit vector,
2428 * so we must branch over the actual store.
2429 */
2430 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2431
2432 if (!before) {
2433 incr_last_active(s, last, esz);
2434 }
2435
2436 ele = load_last_active(s, last, a->rm, esz);
2437 tcg_temp_free_i32(last);
2438
2439 vsz = vec_full_reg_size(s);
2440 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2441 tcg_temp_free_i64(ele);
2442
2443 /* If this insn used MOVPRFX, we may need a second move. */
2444 if (a->rd != a->rn) {
2445 TCGLabel *done = gen_new_label();
2446 tcg_gen_br(done);
2447
2448 gen_set_label(over);
2449 do_mov_z(s, a->rd, a->rn);
2450
2451 gen_set_label(done);
2452 } else {
2453 gen_set_label(over);
2454 }
2455 return true;
2456}
2457
3a7be554 2458static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2459{
2460 return do_clast_vector(s, a, false);
2461}
2462
3a7be554 2463static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2464{
2465 return do_clast_vector(s, a, true);
2466}
2467
2468/* Compute CLAST for a scalar. */
2469static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2470 bool before, TCGv_i64 reg_val)
2471{
2472 TCGv_i32 last = tcg_temp_new_i32();
2473 TCGv_i64 ele, cmp, zero;
2474
2475 find_last_active(s, last, esz, pg);
2476
2477 /* Extend the original value of last prior to incrementing. */
2478 cmp = tcg_temp_new_i64();
2479 tcg_gen_ext_i32_i64(cmp, last);
2480
2481 if (!before) {
2482 incr_last_active(s, last, esz);
2483 }
2484
2485 /* The conceit here is that while last < 0 indicates not found, after
2486 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2487 * from which we can load garbage. We then discard the garbage with
2488 * a conditional move.
2489 */
2490 ele = load_last_active(s, last, rm, esz);
2491 tcg_temp_free_i32(last);
2492
2493 zero = tcg_const_i64(0);
2494 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2495
2496 tcg_temp_free_i64(zero);
2497 tcg_temp_free_i64(cmp);
2498 tcg_temp_free_i64(ele);
2499}
2500
2501/* Compute CLAST for a Vreg. */
2502static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2503{
2504 if (sve_access_check(s)) {
2505 int esz = a->esz;
2506 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2507 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2508
2509 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2510 write_fp_dreg(s, a->rd, reg);
2511 tcg_temp_free_i64(reg);
2512 }
2513 return true;
2514}
2515
3a7be554 2516static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2517{
2518 return do_clast_fp(s, a, false);
2519}
2520
3a7be554 2521static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2522{
2523 return do_clast_fp(s, a, true);
2524}
2525
2526/* Compute CLAST for a Xreg. */
2527static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2528{
2529 TCGv_i64 reg;
2530
2531 if (!sve_access_check(s)) {
2532 return true;
2533 }
2534
2535 reg = cpu_reg(s, a->rd);
2536 switch (a->esz) {
2537 case 0:
2538 tcg_gen_ext8u_i64(reg, reg);
2539 break;
2540 case 1:
2541 tcg_gen_ext16u_i64(reg, reg);
2542 break;
2543 case 2:
2544 tcg_gen_ext32u_i64(reg, reg);
2545 break;
2546 case 3:
2547 break;
2548 default:
2549 g_assert_not_reached();
2550 }
2551
2552 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2553 return true;
2554}
2555
3a7be554 2556static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2557{
2558 return do_clast_general(s, a, false);
2559}
2560
3a7be554 2561static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2562{
2563 return do_clast_general(s, a, true);
2564}
2565
2566/* Compute LAST for a scalar. */
2567static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2568 int pg, int rm, bool before)
2569{
2570 TCGv_i32 last = tcg_temp_new_i32();
2571 TCGv_i64 ret;
2572
2573 find_last_active(s, last, esz, pg);
2574 if (before) {
2575 wrap_last_active(s, last, esz);
2576 } else {
2577 incr_last_active(s, last, esz);
2578 }
2579
2580 ret = load_last_active(s, last, rm, esz);
2581 tcg_temp_free_i32(last);
2582 return ret;
2583}
2584
2585/* Compute LAST for a Vreg. */
2586static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2587{
2588 if (sve_access_check(s)) {
2589 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2590 write_fp_dreg(s, a->rd, val);
2591 tcg_temp_free_i64(val);
2592 }
2593 return true;
2594}
2595
3a7be554 2596static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2597{
2598 return do_last_fp(s, a, false);
2599}
2600
3a7be554 2601static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2602{
2603 return do_last_fp(s, a, true);
2604}
2605
2606/* Compute LAST for a Xreg. */
2607static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2608{
2609 if (sve_access_check(s)) {
2610 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2611 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2612 tcg_temp_free_i64(val);
2613 }
2614 return true;
2615}
2616
3a7be554 2617static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2618{
2619 return do_last_general(s, a, false);
2620}
2621
3a7be554 2622static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2623{
2624 return do_last_general(s, a, true);
2625}
2626
3a7be554 2627static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2628{
2629 if (sve_access_check(s)) {
2630 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2631 }
2632 return true;
2633}
2634
3a7be554 2635static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2636{
2637 if (sve_access_check(s)) {
2638 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2639 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2640 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2641 tcg_temp_free_i64(t);
2642 }
2643 return true;
2644}
2645
3a7be554 2646static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2647{
2648 static gen_helper_gvec_3 * const fns[4] = {
2649 NULL,
2650 gen_helper_sve_revb_h,
2651 gen_helper_sve_revb_s,
2652 gen_helper_sve_revb_d,
2653 };
2654 return do_zpz_ool(s, a, fns[a->esz]);
2655}
2656
3a7be554 2657static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2658{
2659 static gen_helper_gvec_3 * const fns[4] = {
2660 NULL,
2661 NULL,
2662 gen_helper_sve_revh_s,
2663 gen_helper_sve_revh_d,
2664 };
2665 return do_zpz_ool(s, a, fns[a->esz]);
2666}
2667
3a7be554 2668static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2669{
2670 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2671}
2672
3a7be554 2673static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2674{
2675 static gen_helper_gvec_3 * const fns[4] = {
2676 gen_helper_sve_rbit_b,
2677 gen_helper_sve_rbit_h,
2678 gen_helper_sve_rbit_s,
2679 gen_helper_sve_rbit_d,
2680 };
2681 return do_zpz_ool(s, a, fns[a->esz]);
2682}
2683
3a7be554 2684static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
2685{
2686 if (sve_access_check(s)) {
36cbb7a8
RH
2687 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
2688 a->rd, a->rn, a->rm, a->pg, 0);
b48ff240
RH
2689 }
2690 return true;
2691}
2692
757f9cff
RH
2693/*
2694 *** SVE Integer Compare - Vectors Group
2695 */
2696
2697static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2698 gen_helper_gvec_flags_4 *gen_fn)
2699{
2700 TCGv_ptr pd, zn, zm, pg;
2701 unsigned vsz;
2702 TCGv_i32 t;
2703
2704 if (gen_fn == NULL) {
2705 return false;
2706 }
2707 if (!sve_access_check(s)) {
2708 return true;
2709 }
2710
2711 vsz = vec_full_reg_size(s);
2712 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2713 pd = tcg_temp_new_ptr();
2714 zn = tcg_temp_new_ptr();
2715 zm = tcg_temp_new_ptr();
2716 pg = tcg_temp_new_ptr();
2717
2718 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2719 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2720 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2721 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2722
2723 gen_fn(t, pd, zn, zm, pg, t);
2724
2725 tcg_temp_free_ptr(pd);
2726 tcg_temp_free_ptr(zn);
2727 tcg_temp_free_ptr(zm);
2728 tcg_temp_free_ptr(pg);
2729
2730 do_pred_flags(t);
2731
2732 tcg_temp_free_i32(t);
2733 return true;
2734}
2735
2736#define DO_PPZZ(NAME, name) \
3a7be554 2737static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2738{ \
2739 static gen_helper_gvec_flags_4 * const fns[4] = { \
2740 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2741 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2742 }; \
2743 return do_ppzz_flags(s, a, fns[a->esz]); \
2744}
2745
2746DO_PPZZ(CMPEQ, cmpeq)
2747DO_PPZZ(CMPNE, cmpne)
2748DO_PPZZ(CMPGT, cmpgt)
2749DO_PPZZ(CMPGE, cmpge)
2750DO_PPZZ(CMPHI, cmphi)
2751DO_PPZZ(CMPHS, cmphs)
2752
2753#undef DO_PPZZ
2754
2755#define DO_PPZW(NAME, name) \
3a7be554 2756static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2757{ \
2758 static gen_helper_gvec_flags_4 * const fns[4] = { \
2759 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2760 gen_helper_sve_##name##_ppzw_s, NULL \
2761 }; \
2762 return do_ppzz_flags(s, a, fns[a->esz]); \
2763}
2764
2765DO_PPZW(CMPEQ, cmpeq)
2766DO_PPZW(CMPNE, cmpne)
2767DO_PPZW(CMPGT, cmpgt)
2768DO_PPZW(CMPGE, cmpge)
2769DO_PPZW(CMPHI, cmphi)
2770DO_PPZW(CMPHS, cmphs)
2771DO_PPZW(CMPLT, cmplt)
2772DO_PPZW(CMPLE, cmple)
2773DO_PPZW(CMPLO, cmplo)
2774DO_PPZW(CMPLS, cmpls)
2775
2776#undef DO_PPZW
2777
38cadeba
RH
2778/*
2779 *** SVE Integer Compare - Immediate Groups
2780 */
2781
2782static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2783 gen_helper_gvec_flags_3 *gen_fn)
2784{
2785 TCGv_ptr pd, zn, pg;
2786 unsigned vsz;
2787 TCGv_i32 t;
2788
2789 if (gen_fn == NULL) {
2790 return false;
2791 }
2792 if (!sve_access_check(s)) {
2793 return true;
2794 }
2795
2796 vsz = vec_full_reg_size(s);
2797 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2798 pd = tcg_temp_new_ptr();
2799 zn = tcg_temp_new_ptr();
2800 pg = tcg_temp_new_ptr();
2801
2802 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2803 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2804 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2805
2806 gen_fn(t, pd, zn, pg, t);
2807
2808 tcg_temp_free_ptr(pd);
2809 tcg_temp_free_ptr(zn);
2810 tcg_temp_free_ptr(pg);
2811
2812 do_pred_flags(t);
2813
2814 tcg_temp_free_i32(t);
2815 return true;
2816}
2817
2818#define DO_PPZI(NAME, name) \
3a7be554 2819static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
2820{ \
2821 static gen_helper_gvec_flags_3 * const fns[4] = { \
2822 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2823 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2824 }; \
2825 return do_ppzi_flags(s, a, fns[a->esz]); \
2826}
2827
2828DO_PPZI(CMPEQ, cmpeq)
2829DO_PPZI(CMPNE, cmpne)
2830DO_PPZI(CMPGT, cmpgt)
2831DO_PPZI(CMPGE, cmpge)
2832DO_PPZI(CMPHI, cmphi)
2833DO_PPZI(CMPHS, cmphs)
2834DO_PPZI(CMPLT, cmplt)
2835DO_PPZI(CMPLE, cmple)
2836DO_PPZI(CMPLO, cmplo)
2837DO_PPZI(CMPLS, cmpls)
2838
2839#undef DO_PPZI
2840
35da316f
RH
2841/*
2842 *** SVE Partition Break Group
2843 */
2844
2845static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2846 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2847{
2848 if (!sve_access_check(s)) {
2849 return true;
2850 }
2851
2852 unsigned vsz = pred_full_reg_size(s);
2853
2854 /* Predicate sizes may be smaller and cannot use simd_desc. */
2855 TCGv_ptr d = tcg_temp_new_ptr();
2856 TCGv_ptr n = tcg_temp_new_ptr();
2857 TCGv_ptr m = tcg_temp_new_ptr();
2858 TCGv_ptr g = tcg_temp_new_ptr();
2859 TCGv_i32 t = tcg_const_i32(vsz - 2);
2860
2861 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2862 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2863 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2864 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2865
2866 if (a->s) {
2867 fn_s(t, d, n, m, g, t);
2868 do_pred_flags(t);
2869 } else {
2870 fn(d, n, m, g, t);
2871 }
2872 tcg_temp_free_ptr(d);
2873 tcg_temp_free_ptr(n);
2874 tcg_temp_free_ptr(m);
2875 tcg_temp_free_ptr(g);
2876 tcg_temp_free_i32(t);
2877 return true;
2878}
2879
2880static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2881 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2882{
2883 if (!sve_access_check(s)) {
2884 return true;
2885 }
2886
2887 unsigned vsz = pred_full_reg_size(s);
2888
2889 /* Predicate sizes may be smaller and cannot use simd_desc. */
2890 TCGv_ptr d = tcg_temp_new_ptr();
2891 TCGv_ptr n = tcg_temp_new_ptr();
2892 TCGv_ptr g = tcg_temp_new_ptr();
2893 TCGv_i32 t = tcg_const_i32(vsz - 2);
2894
2895 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2896 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2897 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2898
2899 if (a->s) {
2900 fn_s(t, d, n, g, t);
2901 do_pred_flags(t);
2902 } else {
2903 fn(d, n, g, t);
2904 }
2905 tcg_temp_free_ptr(d);
2906 tcg_temp_free_ptr(n);
2907 tcg_temp_free_ptr(g);
2908 tcg_temp_free_i32(t);
2909 return true;
2910}
2911
3a7be554 2912static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2913{
2914 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2915}
2916
3a7be554 2917static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2918{
2919 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2920}
2921
3a7be554 2922static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2923{
2924 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2925}
2926
3a7be554 2927static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2928{
2929 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2930}
2931
3a7be554 2932static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2933{
2934 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2935}
2936
3a7be554 2937static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2938{
2939 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2940}
2941
3a7be554 2942static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2943{
2944 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2945}
2946
9ee3a611
RH
2947/*
2948 *** SVE Predicate Count Group
2949 */
2950
2951static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2952{
2953 unsigned psz = pred_full_reg_size(s);
2954
2955 if (psz <= 8) {
2956 uint64_t psz_mask;
2957
2958 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2959 if (pn != pg) {
2960 TCGv_i64 g = tcg_temp_new_i64();
2961 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2962 tcg_gen_and_i64(val, val, g);
2963 tcg_temp_free_i64(g);
2964 }
2965
2966 /* Reduce the pred_esz_masks value simply to reduce the
2967 * size of the code generated here.
2968 */
2969 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2970 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2971
2972 tcg_gen_ctpop_i64(val, val);
2973 } else {
2974 TCGv_ptr t_pn = tcg_temp_new_ptr();
2975 TCGv_ptr t_pg = tcg_temp_new_ptr();
2976 unsigned desc;
2977 TCGv_i32 t_desc;
2978
2979 desc = psz - 2;
2980 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2981
2982 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
2983 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2984 t_desc = tcg_const_i32(desc);
2985
2986 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
2987 tcg_temp_free_ptr(t_pn);
2988 tcg_temp_free_ptr(t_pg);
2989 tcg_temp_free_i32(t_desc);
2990 }
2991}
2992
3a7be554 2993static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
2994{
2995 if (sve_access_check(s)) {
2996 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
2997 }
2998 return true;
2999}
3000
3a7be554 3001static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3002{
3003 if (sve_access_check(s)) {
3004 TCGv_i64 reg = cpu_reg(s, a->rd);
3005 TCGv_i64 val = tcg_temp_new_i64();
3006
3007 do_cntp(s, val, a->esz, a->pg, a->pg);
3008 if (a->d) {
3009 tcg_gen_sub_i64(reg, reg, val);
3010 } else {
3011 tcg_gen_add_i64(reg, reg, val);
3012 }
3013 tcg_temp_free_i64(val);
3014 }
3015 return true;
3016}
3017
3a7be554 3018static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3019{
3020 if (a->esz == 0) {
3021 return false;
3022 }
3023 if (sve_access_check(s)) {
3024 unsigned vsz = vec_full_reg_size(s);
3025 TCGv_i64 val = tcg_temp_new_i64();
3026 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3027
3028 do_cntp(s, val, a->esz, a->pg, a->pg);
3029 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3030 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3031 }
3032 return true;
3033}
3034
3a7be554 3035static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3036{
3037 if (sve_access_check(s)) {
3038 TCGv_i64 reg = cpu_reg(s, a->rd);
3039 TCGv_i64 val = tcg_temp_new_i64();
3040
3041 do_cntp(s, val, a->esz, a->pg, a->pg);
3042 do_sat_addsub_32(reg, val, a->u, a->d);
3043 }
3044 return true;
3045}
3046
3a7be554 3047static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3048{
3049 if (sve_access_check(s)) {
3050 TCGv_i64 reg = cpu_reg(s, a->rd);
3051 TCGv_i64 val = tcg_temp_new_i64();
3052
3053 do_cntp(s, val, a->esz, a->pg, a->pg);
3054 do_sat_addsub_64(reg, val, a->u, a->d);
3055 }
3056 return true;
3057}
3058
3a7be554 3059static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3060{
3061 if (a->esz == 0) {
3062 return false;
3063 }
3064 if (sve_access_check(s)) {
3065 TCGv_i64 val = tcg_temp_new_i64();
3066 do_cntp(s, val, a->esz, a->pg, a->pg);
3067 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3068 }
3069 return true;
3070}
3071
caf1cefc
RH
3072/*
3073 *** SVE Integer Compare Scalars Group
3074 */
3075
3a7be554 3076static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3077{
3078 if (!sve_access_check(s)) {
3079 return true;
3080 }
3081
3082 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3083 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3084 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3085 TCGv_i64 cmp = tcg_temp_new_i64();
3086
3087 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3088 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3089 tcg_temp_free_i64(cmp);
3090
3091 /* VF = !NF & !CF. */
3092 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3093 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3094
3095 /* Both NF and VF actually look at bit 31. */
3096 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3097 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3098 return true;
3099}
3100
3a7be554 3101static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3102{
bbd0968c 3103 TCGv_i64 op0, op1, t0, t1, tmax;
caf1cefc
RH
3104 TCGv_i32 t2, t3;
3105 TCGv_ptr ptr;
3106 unsigned desc, vsz = vec_full_reg_size(s);
3107 TCGCond cond;
3108
bbd0968c
RH
3109 if (!sve_access_check(s)) {
3110 return true;
3111 }
3112
3113 op0 = read_cpu_reg(s, a->rn, 1);
3114 op1 = read_cpu_reg(s, a->rm, 1);
3115
caf1cefc
RH
3116 if (!a->sf) {
3117 if (a->u) {
3118 tcg_gen_ext32u_i64(op0, op0);
3119 tcg_gen_ext32u_i64(op1, op1);
3120 } else {
3121 tcg_gen_ext32s_i64(op0, op0);
3122 tcg_gen_ext32s_i64(op1, op1);
3123 }
3124 }
3125
3126 /* For the helper, compress the different conditions into a computation
3127 * of how many iterations for which the condition is true.
caf1cefc 3128 */
bbd0968c
RH
3129 t0 = tcg_temp_new_i64();
3130 t1 = tcg_temp_new_i64();
caf1cefc
RH
3131 tcg_gen_sub_i64(t0, op1, op0);
3132
bbd0968c 3133 tmax = tcg_const_i64(vsz >> a->esz);
caf1cefc
RH
3134 if (a->eq) {
3135 /* Equality means one more iteration. */
3136 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c
RH
3137
3138 /* If op1 is max (un)signed integer (and the only time the addition
3139 * above could overflow), then we produce an all-true predicate by
3140 * setting the count to the vector length. This is because the
3141 * pseudocode is described as an increment + compare loop, and the
3142 * max integer would always compare true.
3143 */
3144 tcg_gen_movi_i64(t1, (a->sf
3145 ? (a->u ? UINT64_MAX : INT64_MAX)
3146 : (a->u ? UINT32_MAX : INT32_MAX)));
3147 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3148 }
3149
bbd0968c
RH
3150 /* Bound to the maximum. */
3151 tcg_gen_umin_i64(t0, t0, tmax);
3152 tcg_temp_free_i64(tmax);
3153
3154 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3155 cond = (a->u
3156 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3157 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3158 tcg_gen_movi_i64(t1, 0);
3159 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3160 tcg_temp_free_i64(t1);
caf1cefc 3161
bbd0968c 3162 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3163 t2 = tcg_temp_new_i32();
3164 tcg_gen_extrl_i64_i32(t2, t0);
3165 tcg_temp_free_i64(t0);
bbd0968c
RH
3166
3167 /* Scale elements to bits. */
3168 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc
RH
3169
3170 desc = (vsz / 8) - 2;
3171 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3172 t3 = tcg_const_i32(desc);
3173
3174 ptr = tcg_temp_new_ptr();
3175 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3176
3177 gen_helper_sve_while(t2, ptr, t2, t3);
3178 do_pred_flags(t2);
3179
3180 tcg_temp_free_ptr(ptr);
3181 tcg_temp_free_i32(t2);
3182 tcg_temp_free_i32(t3);
3183 return true;
3184}
3185
ed491961
RH
3186/*
3187 *** SVE Integer Wide Immediate - Unpredicated Group
3188 */
3189
3a7be554 3190static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3191{
3192 if (a->esz == 0) {
3193 return false;
3194 }
3195 if (sve_access_check(s)) {
3196 unsigned vsz = vec_full_reg_size(s);
3197 int dofs = vec_full_reg_offset(s, a->rd);
3198 uint64_t imm;
3199
3200 /* Decode the VFP immediate. */
3201 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3202 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3203 }
3204 return true;
3205}
3206
3a7be554 3207static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3208{
3a7be554 3209 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3210 return false;
3211 }
3212 if (sve_access_check(s)) {
3213 unsigned vsz = vec_full_reg_size(s);
3214 int dofs = vec_full_reg_offset(s, a->rd);
3215
8711e71f 3216 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3217 }
3218 return true;
3219}
3220
3a7be554 3221static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3222{
3a7be554 3223 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3224 return false;
3225 }
3226 if (sve_access_check(s)) {
3227 unsigned vsz = vec_full_reg_size(s);
3228 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3229 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3230 }
3231 return true;
3232}
3233
3a7be554 3234static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3235{
3236 a->imm = -a->imm;
3a7be554 3237 return trans_ADD_zzi(s, a);
6e6a157d
RH
3238}
3239
3a7be554 3240static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3241{
53229a77 3242 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3243 static const GVecGen2s op[4] = {
3244 { .fni8 = tcg_gen_vec_sub8_i64,
3245 .fniv = tcg_gen_sub_vec,
3246 .fno = gen_helper_sve_subri_b,
53229a77 3247 .opt_opc = vecop_list,
6e6a157d
RH
3248 .vece = MO_8,
3249 .scalar_first = true },
3250 { .fni8 = tcg_gen_vec_sub16_i64,
3251 .fniv = tcg_gen_sub_vec,
3252 .fno = gen_helper_sve_subri_h,
53229a77 3253 .opt_opc = vecop_list,
6e6a157d
RH
3254 .vece = MO_16,
3255 .scalar_first = true },
3256 { .fni4 = tcg_gen_sub_i32,
3257 .fniv = tcg_gen_sub_vec,
3258 .fno = gen_helper_sve_subri_s,
53229a77 3259 .opt_opc = vecop_list,
6e6a157d
RH
3260 .vece = MO_32,
3261 .scalar_first = true },
3262 { .fni8 = tcg_gen_sub_i64,
3263 .fniv = tcg_gen_sub_vec,
3264 .fno = gen_helper_sve_subri_d,
53229a77 3265 .opt_opc = vecop_list,
6e6a157d
RH
3266 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3267 .vece = MO_64,
3268 .scalar_first = true }
3269 };
3270
3a7be554 3271 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3272 return false;
3273 }
3274 if (sve_access_check(s)) {
3275 unsigned vsz = vec_full_reg_size(s);
3276 TCGv_i64 c = tcg_const_i64(a->imm);
3277 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3278 vec_full_reg_offset(s, a->rn),
3279 vsz, vsz, c, &op[a->esz]);
3280 tcg_temp_free_i64(c);
3281 }
3282 return true;
3283}
3284
3a7be554 3285static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3286{
3287 if (sve_access_check(s)) {
3288 unsigned vsz = vec_full_reg_size(s);
3289 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3290 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3291 }
3292 return true;
3293}
3294
3a7be554 3295static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3296{
3a7be554 3297 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3298 return false;
3299 }
3300 if (sve_access_check(s)) {
3301 TCGv_i64 val = tcg_const_i64(a->imm);
3302 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3303 tcg_temp_free_i64(val);
3304 }
3305 return true;
3306}
3307
3a7be554 3308static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3309{
3a7be554 3310 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3311}
3312
3a7be554 3313static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3314{
3a7be554 3315 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3316}
3317
3a7be554 3318static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3319{
3a7be554 3320 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3321}
3322
3a7be554 3323static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3324{
3a7be554 3325 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3326}
3327
3328static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3329{
3330 if (sve_access_check(s)) {
3331 unsigned vsz = vec_full_reg_size(s);
3332 TCGv_i64 c = tcg_const_i64(a->imm);
3333
3334 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3335 vec_full_reg_offset(s, a->rn),
3336 c, vsz, vsz, 0, fn);
3337 tcg_temp_free_i64(c);
3338 }
3339 return true;
3340}
3341
3342#define DO_ZZI(NAME, name) \
3a7be554 3343static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3344{ \
3345 static gen_helper_gvec_2i * const fns[4] = { \
3346 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3347 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3348 }; \
3349 return do_zzi_ool(s, a, fns[a->esz]); \
3350}
3351
3352DO_ZZI(SMAX, smax)
3353DO_ZZI(UMAX, umax)
3354DO_ZZI(SMIN, smin)
3355DO_ZZI(UMIN, umin)
3356
3357#undef DO_ZZI
3358
3a7be554 3359static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
d730ecaa
RH
3360{
3361 static gen_helper_gvec_3 * const fns[2][2] = {
3362 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3363 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3364 };
3365
3366 if (sve_access_check(s)) {
e645d1a1 3367 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
d730ecaa
RH
3368 }
3369 return true;
3370}
3371
3a7be554 3372static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
16fcfdc7
RH
3373{
3374 static gen_helper_gvec_3 * const fns[2][2] = {
3375 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3376 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3377 };
3378
3379 if (sve_access_check(s)) {
e645d1a1 3380 gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
16fcfdc7
RH
3381 }
3382 return true;
3383}
3384
3385
ca40a6e6
RH
3386/*
3387 *** SVE Floating Point Multiply-Add Indexed Group
3388 */
3389
3a7be554 3390static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
ca40a6e6
RH
3391{
3392 static gen_helper_gvec_4_ptr * const fns[3] = {
3393 gen_helper_gvec_fmla_idx_h,
3394 gen_helper_gvec_fmla_idx_s,
3395 gen_helper_gvec_fmla_idx_d,
3396 };
3397
3398 if (sve_access_check(s)) {
3399 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3400 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3401 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3402 vec_full_reg_offset(s, a->rn),
3403 vec_full_reg_offset(s, a->rm),
3404 vec_full_reg_offset(s, a->ra),
3405 status, vsz, vsz, (a->index << 1) | a->sub,
3406 fns[a->esz - 1]);
3407 tcg_temp_free_ptr(status);
3408 }
3409 return true;
3410}
3411
3412/*
3413 *** SVE Floating Point Multiply Indexed Group
3414 */
3415
3a7be554 3416static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3417{
3418 static gen_helper_gvec_3_ptr * const fns[3] = {
3419 gen_helper_gvec_fmul_idx_h,
3420 gen_helper_gvec_fmul_idx_s,
3421 gen_helper_gvec_fmul_idx_d,
3422 };
3423
3424 if (sve_access_check(s)) {
3425 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3426 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ca40a6e6
RH
3427 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3428 vec_full_reg_offset(s, a->rn),
3429 vec_full_reg_offset(s, a->rm),
3430 status, vsz, vsz, a->index, fns[a->esz - 1]);
3431 tcg_temp_free_ptr(status);
3432 }
3433 return true;
3434}
3435
23fbe79f
RH
3436/*
3437 *** SVE Floating Point Fast Reduction Group
3438 */
3439
3440typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3441 TCGv_ptr, TCGv_i32);
3442
3443static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3444 gen_helper_fp_reduce *fn)
3445{
3446 unsigned vsz = vec_full_reg_size(s);
3447 unsigned p2vsz = pow2ceil(vsz);
3448 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3449 TCGv_ptr t_zn, t_pg, status;
3450 TCGv_i64 temp;
3451
3452 temp = tcg_temp_new_i64();
3453 t_zn = tcg_temp_new_ptr();
3454 t_pg = tcg_temp_new_ptr();
3455
3456 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3457 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3458 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
23fbe79f
RH
3459
3460 fn(temp, t_zn, t_pg, status, t_desc);
3461 tcg_temp_free_ptr(t_zn);
3462 tcg_temp_free_ptr(t_pg);
3463 tcg_temp_free_ptr(status);
3464 tcg_temp_free_i32(t_desc);
3465
3466 write_fp_dreg(s, a->rd, temp);
3467 tcg_temp_free_i64(temp);
3468}
3469
3470#define DO_VPZ(NAME, name) \
3a7be554 3471static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3472{ \
3473 static gen_helper_fp_reduce * const fns[3] = { \
3474 gen_helper_sve_##name##_h, \
3475 gen_helper_sve_##name##_s, \
3476 gen_helper_sve_##name##_d, \
3477 }; \
3478 if (a->esz == 0) { \
3479 return false; \
3480 } \
3481 if (sve_access_check(s)) { \
3482 do_reduce(s, a, fns[a->esz - 1]); \
3483 } \
3484 return true; \
3485}
3486
3487DO_VPZ(FADDV, faddv)
3488DO_VPZ(FMINNMV, fminnmv)
3489DO_VPZ(FMAXNMV, fmaxnmv)
3490DO_VPZ(FMINV, fminv)
3491DO_VPZ(FMAXV, fmaxv)
3492
3887c038
RH
3493/*
3494 *** SVE Floating Point Unary Operations - Unpredicated Group
3495 */
3496
3497static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3498{
3499 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3500 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3887c038
RH
3501
3502 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3503 vec_full_reg_offset(s, a->rn),
3504 status, vsz, vsz, 0, fn);
3505 tcg_temp_free_ptr(status);
3506}
3507
3a7be554 3508static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3509{
3510 static gen_helper_gvec_2_ptr * const fns[3] = {
3511 gen_helper_gvec_frecpe_h,
3512 gen_helper_gvec_frecpe_s,
3513 gen_helper_gvec_frecpe_d,
3514 };
3515 if (a->esz == 0) {
3516 return false;
3517 }
3518 if (sve_access_check(s)) {
3519 do_zz_fp(s, a, fns[a->esz - 1]);
3520 }
3521 return true;
3522}
3523
3a7be554 3524static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3525{
3526 static gen_helper_gvec_2_ptr * const fns[3] = {
3527 gen_helper_gvec_frsqrte_h,
3528 gen_helper_gvec_frsqrte_s,
3529 gen_helper_gvec_frsqrte_d,
3530 };
3531 if (a->esz == 0) {
3532 return false;
3533 }
3534 if (sve_access_check(s)) {
3535 do_zz_fp(s, a, fns[a->esz - 1]);
3536 }
3537 return true;
3538}
3539
4d2e2a03
RH
3540/*
3541 *** SVE Floating Point Compare with Zero Group
3542 */
3543
3544static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3545 gen_helper_gvec_3_ptr *fn)
3546{
3547 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3548 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4d2e2a03
RH
3549
3550 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3551 vec_full_reg_offset(s, a->rn),
3552 pred_full_reg_offset(s, a->pg),
3553 status, vsz, vsz, 0, fn);
3554 tcg_temp_free_ptr(status);
3555}
3556
3557#define DO_PPZ(NAME, name) \
3a7be554 3558static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3559{ \
3560 static gen_helper_gvec_3_ptr * const fns[3] = { \
3561 gen_helper_sve_##name##_h, \
3562 gen_helper_sve_##name##_s, \
3563 gen_helper_sve_##name##_d, \
3564 }; \
3565 if (a->esz == 0) { \
3566 return false; \
3567 } \
3568 if (sve_access_check(s)) { \
3569 do_ppz_fp(s, a, fns[a->esz - 1]); \
3570 } \
3571 return true; \
3572}
3573
3574DO_PPZ(FCMGE_ppz0, fcmge0)
3575DO_PPZ(FCMGT_ppz0, fcmgt0)
3576DO_PPZ(FCMLE_ppz0, fcmle0)
3577DO_PPZ(FCMLT_ppz0, fcmlt0)
3578DO_PPZ(FCMEQ_ppz0, fcmeq0)
3579DO_PPZ(FCMNE_ppz0, fcmne0)
3580
3581#undef DO_PPZ
3582
67fcd9ad
RH
3583/*
3584 *** SVE floating-point trig multiply-add coefficient
3585 */
3586
3a7be554 3587static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
3588{
3589 static gen_helper_gvec_3_ptr * const fns[3] = {
3590 gen_helper_sve_ftmad_h,
3591 gen_helper_sve_ftmad_s,
3592 gen_helper_sve_ftmad_d,
3593 };
3594
3595 if (a->esz == 0) {
3596 return false;
3597 }
3598 if (sve_access_check(s)) {
3599 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3600 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
67fcd9ad
RH
3601 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3602 vec_full_reg_offset(s, a->rn),
3603 vec_full_reg_offset(s, a->rm),
3604 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3605 tcg_temp_free_ptr(status);
3606 }
3607 return true;
3608}
3609
7f9ddf64
RH
3610/*
3611 *** SVE Floating Point Accumulating Reduction Group
3612 */
3613
3a7be554 3614static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3615{
3616 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3617 TCGv_ptr, TCGv_ptr, TCGv_i32);
3618 static fadda_fn * const fns[3] = {
3619 gen_helper_sve_fadda_h,
3620 gen_helper_sve_fadda_s,
3621 gen_helper_sve_fadda_d,
3622 };
3623 unsigned vsz = vec_full_reg_size(s);
3624 TCGv_ptr t_rm, t_pg, t_fpst;
3625 TCGv_i64 t_val;
3626 TCGv_i32 t_desc;
3627
3628 if (a->esz == 0) {
3629 return false;
3630 }
3631 if (!sve_access_check(s)) {
3632 return true;
3633 }
3634
3635 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3636 t_rm = tcg_temp_new_ptr();
3637 t_pg = tcg_temp_new_ptr();
3638 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3639 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
cdfb22bb 3640 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7f9ddf64
RH
3641 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3642
3643 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3644
3645 tcg_temp_free_i32(t_desc);
3646 tcg_temp_free_ptr(t_fpst);
3647 tcg_temp_free_ptr(t_pg);
3648 tcg_temp_free_ptr(t_rm);
3649
3650 write_fp_dreg(s, a->rd, t_val);
3651 tcg_temp_free_i64(t_val);
3652 return true;
3653}
3654
29b80469
RH
3655/*
3656 *** SVE Floating Point Arithmetic - Unpredicated Group
3657 */
3658
3659static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3660 gen_helper_gvec_3_ptr *fn)
3661{
3662 if (fn == NULL) {
3663 return false;
3664 }
3665 if (sve_access_check(s)) {
3666 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3667 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
29b80469
RH
3668 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3669 vec_full_reg_offset(s, a->rn),
3670 vec_full_reg_offset(s, a->rm),
3671 status, vsz, vsz, 0, fn);
3672 tcg_temp_free_ptr(status);
3673 }
3674 return true;
3675}
3676
3677
3678#define DO_FP3(NAME, name) \
3a7be554 3679static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
3680{ \
3681 static gen_helper_gvec_3_ptr * const fns[4] = { \
3682 NULL, gen_helper_gvec_##name##_h, \
3683 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3684 }; \
3685 return do_zzz_fp(s, a, fns[a->esz]); \
3686}
3687
3688DO_FP3(FADD_zzz, fadd)
3689DO_FP3(FSUB_zzz, fsub)
3690DO_FP3(FMUL_zzz, fmul)
3691DO_FP3(FTSMUL, ftsmul)
3692DO_FP3(FRECPS, recps)
3693DO_FP3(FRSQRTS, rsqrts)
3694
3695#undef DO_FP3
3696
ec3b87c2
RH
3697/*
3698 *** SVE Floating Point Arithmetic - Predicated Group
3699 */
3700
3701static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3702 gen_helper_gvec_4_ptr *fn)
3703{
3704 if (fn == NULL) {
3705 return false;
3706 }
3707 if (sve_access_check(s)) {
3708 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3709 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
ec3b87c2
RH
3710 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3711 vec_full_reg_offset(s, a->rn),
3712 vec_full_reg_offset(s, a->rm),
3713 pred_full_reg_offset(s, a->pg),
3714 status, vsz, vsz, 0, fn);
3715 tcg_temp_free_ptr(status);
3716 }
3717 return true;
3718}
3719
3720#define DO_FP3(NAME, name) \
3a7be554 3721static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
3722{ \
3723 static gen_helper_gvec_4_ptr * const fns[4] = { \
3724 NULL, gen_helper_sve_##name##_h, \
3725 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3726 }; \
3727 return do_zpzz_fp(s, a, fns[a->esz]); \
3728}
3729
3730DO_FP3(FADD_zpzz, fadd)
3731DO_FP3(FSUB_zpzz, fsub)
3732DO_FP3(FMUL_zpzz, fmul)
3733DO_FP3(FMIN_zpzz, fmin)
3734DO_FP3(FMAX_zpzz, fmax)
3735DO_FP3(FMINNM_zpzz, fminnum)
3736DO_FP3(FMAXNM_zpzz, fmaxnum)
3737DO_FP3(FABD, fabd)
3738DO_FP3(FSCALE, fscalbn)
3739DO_FP3(FDIV, fdiv)
3740DO_FP3(FMULX, fmulx)
3741
3742#undef DO_FP3
8092c6a3 3743
cc48affe
RH
3744typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3745 TCGv_i64, TCGv_ptr, TCGv_i32);
3746
3747static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3748 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3749{
3750 unsigned vsz = vec_full_reg_size(s);
3751 TCGv_ptr t_zd, t_zn, t_pg, status;
3752 TCGv_i32 desc;
3753
3754 t_zd = tcg_temp_new_ptr();
3755 t_zn = tcg_temp_new_ptr();
3756 t_pg = tcg_temp_new_ptr();
3757 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3758 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3759 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3760
cdfb22bb 3761 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
cc48affe
RH
3762 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3763 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3764
3765 tcg_temp_free_i32(desc);
3766 tcg_temp_free_ptr(status);
3767 tcg_temp_free_ptr(t_pg);
3768 tcg_temp_free_ptr(t_zn);
3769 tcg_temp_free_ptr(t_zd);
3770}
3771
3772static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3773 gen_helper_sve_fp2scalar *fn)
3774{
3775 TCGv_i64 temp = tcg_const_i64(imm);
3776 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3777 tcg_temp_free_i64(temp);
3778}
3779
3780#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 3781static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
3782{ \
3783 static gen_helper_sve_fp2scalar * const fns[3] = { \
3784 gen_helper_sve_##name##_h, \
3785 gen_helper_sve_##name##_s, \
3786 gen_helper_sve_##name##_d \
3787 }; \
3788 static uint64_t const val[3][2] = { \
3789 { float16_##const0, float16_##const1 }, \
3790 { float32_##const0, float32_##const1 }, \
3791 { float64_##const0, float64_##const1 }, \
3792 }; \
3793 if (a->esz == 0) { \
3794 return false; \
3795 } \
3796 if (sve_access_check(s)) { \
3797 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3798 } \
3799 return true; \
3800}
3801
3802#define float16_two make_float16(0x4000)
3803#define float32_two make_float32(0x40000000)
3804#define float64_two make_float64(0x4000000000000000ULL)
3805
3806DO_FP_IMM(FADD, fadds, half, one)
3807DO_FP_IMM(FSUB, fsubs, half, one)
3808DO_FP_IMM(FMUL, fmuls, half, two)
3809DO_FP_IMM(FSUBR, fsubrs, half, one)
3810DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3811DO_FP_IMM(FMINNM, fminnms, zero, one)
3812DO_FP_IMM(FMAX, fmaxs, zero, one)
3813DO_FP_IMM(FMIN, fmins, zero, one)
3814
3815#undef DO_FP_IMM
3816
abfdefd5
RH
3817static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3818 gen_helper_gvec_4_ptr *fn)
3819{
3820 if (fn == NULL) {
3821 return false;
3822 }
3823 if (sve_access_check(s)) {
3824 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3825 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
abfdefd5
RH
3826 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3827 vec_full_reg_offset(s, a->rn),
3828 vec_full_reg_offset(s, a->rm),
3829 pred_full_reg_offset(s, a->pg),
3830 status, vsz, vsz, 0, fn);
3831 tcg_temp_free_ptr(status);
3832 }
3833 return true;
3834}
3835
3836#define DO_FPCMP(NAME, name) \
3a7be554 3837static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
3838{ \
3839 static gen_helper_gvec_4_ptr * const fns[4] = { \
3840 NULL, gen_helper_sve_##name##_h, \
3841 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3842 }; \
3843 return do_fp_cmp(s, a, fns[a->esz]); \
3844}
3845
3846DO_FPCMP(FCMGE, fcmge)
3847DO_FPCMP(FCMGT, fcmgt)
3848DO_FPCMP(FCMEQ, fcmeq)
3849DO_FPCMP(FCMNE, fcmne)
3850DO_FPCMP(FCMUO, fcmuo)
3851DO_FPCMP(FACGE, facge)
3852DO_FPCMP(FACGT, facgt)
3853
3854#undef DO_FPCMP
3855
3a7be554 3856static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
3857{
3858 static gen_helper_gvec_4_ptr * const fns[3] = {
3859 gen_helper_sve_fcadd_h,
3860 gen_helper_sve_fcadd_s,
3861 gen_helper_sve_fcadd_d
3862 };
3863
3864 if (a->esz == 0) {
3865 return false;
3866 }
3867 if (sve_access_check(s)) {
3868 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3869 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
76a9d9cd
RH
3870 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3871 vec_full_reg_offset(s, a->rn),
3872 vec_full_reg_offset(s, a->rm),
3873 pred_full_reg_offset(s, a->pg),
3874 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3875 tcg_temp_free_ptr(status);
3876 }
3877 return true;
3878}
3879
08975da9
RH
3880static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3881 gen_helper_gvec_5_ptr *fn)
6ceabaad 3882{
08975da9 3883 if (a->esz == 0) {
6ceabaad
RH
3884 return false;
3885 }
08975da9
RH
3886 if (sve_access_check(s)) {
3887 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3888 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
3889 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3890 vec_full_reg_offset(s, a->rn),
3891 vec_full_reg_offset(s, a->rm),
3892 vec_full_reg_offset(s, a->ra),
3893 pred_full_reg_offset(s, a->pg),
3894 status, vsz, vsz, 0, fn);
3895 tcg_temp_free_ptr(status);
6ceabaad 3896 }
6ceabaad
RH
3897 return true;
3898}
3899
3900#define DO_FMLA(NAME, name) \
3a7be554 3901static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 3902{ \
08975da9 3903 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
3904 NULL, gen_helper_sve_##name##_h, \
3905 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3906 }; \
3907 return do_fmla(s, a, fns[a->esz]); \
3908}
3909
3910DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3911DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3912DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3913DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3914
3915#undef DO_FMLA
3916
3a7be554 3917static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 3918{
08975da9
RH
3919 static gen_helper_gvec_5_ptr * const fns[4] = {
3920 NULL,
05f48bab
RH
3921 gen_helper_sve_fcmla_zpzzz_h,
3922 gen_helper_sve_fcmla_zpzzz_s,
3923 gen_helper_sve_fcmla_zpzzz_d,
3924 };
3925
3926 if (a->esz == 0) {
3927 return false;
3928 }
3929 if (sve_access_check(s)) {
3930 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3931 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
08975da9
RH
3932 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3933 vec_full_reg_offset(s, a->rn),
3934 vec_full_reg_offset(s, a->rm),
3935 vec_full_reg_offset(s, a->ra),
3936 pred_full_reg_offset(s, a->pg),
3937 status, vsz, vsz, a->rot, fns[a->esz]);
3938 tcg_temp_free_ptr(status);
05f48bab
RH
3939 }
3940 return true;
3941}
3942
3a7be554 3943static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405
RH
3944{
3945 static gen_helper_gvec_3_ptr * const fns[2] = {
3946 gen_helper_gvec_fcmlah_idx,
3947 gen_helper_gvec_fcmlas_idx,
3948 };
3949
3950 tcg_debug_assert(a->esz == 1 || a->esz == 2);
3951 tcg_debug_assert(a->rd == a->ra);
3952 if (sve_access_check(s)) {
3953 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3954 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
18fc2405
RH
3955 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3956 vec_full_reg_offset(s, a->rn),
3957 vec_full_reg_offset(s, a->rm),
3958 status, vsz, vsz,
3959 a->index * 4 + a->rot,
3960 fns[a->esz - 1]);
3961 tcg_temp_free_ptr(status);
3962 }
3963 return true;
3964}
3965
8092c6a3
RH
3966/*
3967 *** SVE Floating Point Unary Operations Predicated Group
3968 */
3969
3970static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3971 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3972{
3973 if (sve_access_check(s)) {
3974 unsigned vsz = vec_full_reg_size(s);
cdfb22bb 3975 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
8092c6a3
RH
3976 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3977 vec_full_reg_offset(s, rn),
3978 pred_full_reg_offset(s, pg),
3979 status, vsz, vsz, 0, fn);
3980 tcg_temp_free_ptr(status);
3981 }
3982 return true;
3983}
3984
3a7be554 3985static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 3986{
e4ab5124 3987 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
3988}
3989
3a7be554 3990static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
3991{
3992 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
3993}
3994
3a7be554 3995static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 3996{
e4ab5124 3997 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
3998}
3999
3a7be554 4000static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4001{
4002 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4003}
4004
3a7be554 4005static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4006{
4007 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4008}
4009
3a7be554 4010static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4011{
4012 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4013}
4014
3a7be554 4015static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4016{
4017 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4018}
4019
3a7be554 4020static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4021{
4022 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4023}
4024
3a7be554 4025static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4026{
4027 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4028}
4029
3a7be554 4030static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4031{
4032 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4033}
4034
3a7be554 4035static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4036{
4037 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4038}
4039
3a7be554 4040static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4041{
4042 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4043}
4044
3a7be554 4045static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4046{
4047 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4048}
4049
3a7be554 4050static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4051{
4052 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4053}
4054
3a7be554 4055static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4056{
4057 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4058}
4059
3a7be554 4060static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4061{
4062 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4063}
4064
3a7be554 4065static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4066{
4067 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4068}
4069
3a7be554 4070static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4071{
4072 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4073}
4074
3a7be554 4075static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4076{
4077 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4078}
4079
3a7be554 4080static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4081{
4082 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4083}
4084
cda3c753
RH
4085static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4086 gen_helper_sve_frint_h,
4087 gen_helper_sve_frint_s,
4088 gen_helper_sve_frint_d
4089};
4090
3a7be554 4091static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4092{
4093 if (a->esz == 0) {
4094 return false;
4095 }
4096 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4097 frint_fns[a->esz - 1]);
4098}
4099
3a7be554 4100static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4101{
4102 static gen_helper_gvec_3_ptr * const fns[3] = {
4103 gen_helper_sve_frintx_h,
4104 gen_helper_sve_frintx_s,
4105 gen_helper_sve_frintx_d
4106 };
4107 if (a->esz == 0) {
4108 return false;
4109 }
4110 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4111}
4112
4113static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4114{
4115 if (a->esz == 0) {
4116 return false;
4117 }
4118 if (sve_access_check(s)) {
4119 unsigned vsz = vec_full_reg_size(s);
4120 TCGv_i32 tmode = tcg_const_i32(mode);
cdfb22bb 4121 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
cda3c753
RH
4122
4123 gen_helper_set_rmode(tmode, tmode, status);
4124
4125 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4126 vec_full_reg_offset(s, a->rn),
4127 pred_full_reg_offset(s, a->pg),
4128 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4129
4130 gen_helper_set_rmode(tmode, tmode, status);
4131 tcg_temp_free_i32(tmode);
4132 tcg_temp_free_ptr(status);
4133 }
4134 return true;
4135}
4136
3a7be554 4137static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4138{
4139 return do_frint_mode(s, a, float_round_nearest_even);
4140}
4141
3a7be554 4142static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4143{
4144 return do_frint_mode(s, a, float_round_up);
4145}
4146
3a7be554 4147static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4148{
4149 return do_frint_mode(s, a, float_round_down);
4150}
4151
3a7be554 4152static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4153{
4154 return do_frint_mode(s, a, float_round_to_zero);
4155}
4156
3a7be554 4157static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4158{
4159 return do_frint_mode(s, a, float_round_ties_away);
4160}
4161
3a7be554 4162static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4163{
4164 static gen_helper_gvec_3_ptr * const fns[3] = {
4165 gen_helper_sve_frecpx_h,
4166 gen_helper_sve_frecpx_s,
4167 gen_helper_sve_frecpx_d
4168 };
4169 if (a->esz == 0) {
4170 return false;
4171 }
4172 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4173}
4174
3a7be554 4175static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4176{
4177 static gen_helper_gvec_3_ptr * const fns[3] = {
4178 gen_helper_sve_fsqrt_h,
4179 gen_helper_sve_fsqrt_s,
4180 gen_helper_sve_fsqrt_d
4181 };
4182 if (a->esz == 0) {
4183 return false;
4184 }
4185 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4186}
4187
3a7be554 4188static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4189{
4190 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4191}
4192
3a7be554 4193static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4194{
4195 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4196}
4197
3a7be554 4198static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4199{
4200 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4201}
4202
3a7be554 4203static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4204{
4205 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4206}
4207
3a7be554 4208static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4209{
4210 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4211}
4212
3a7be554 4213static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4214{
4215 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4216}
4217
3a7be554 4218static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4219{
4220 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4221}
4222
3a7be554 4223static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4224{
4225 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4226}
4227
3a7be554 4228static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4229{
4230 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4231}
4232
3a7be554 4233static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4234{
4235 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4236}
4237
3a7be554 4238static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4239{
4240 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4241}
4242
3a7be554 4243static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4244{
4245 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4246}
4247
3a7be554 4248static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4249{
4250 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4251}
4252
3a7be554 4253static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4254{
4255 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4256}
4257
d1822297
RH
4258/*
4259 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4260 */
4261
4262/* Subroutine loading a vector register at VOFS of LEN bytes.
4263 * The load should begin at the address Rn + IMM.
4264 */
4265
19f2acc9 4266static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4267{
19f2acc9
RH
4268 int len_align = QEMU_ALIGN_DOWN(len, 8);
4269 int len_remain = len % 8;
4270 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4271 int midx = get_mem_index(s);
b2aa8879 4272 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4273
b2aa8879
RH
4274 dirty_addr = tcg_temp_new_i64();
4275 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4276 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4277 tcg_temp_free_i64(dirty_addr);
d1822297 4278
b2aa8879
RH
4279 /*
4280 * Note that unpredicated load/store of vector/predicate registers
d1822297 4281 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4282 * operations on larger quantities.
d1822297
RH
4283 * Attempt to keep code expansion to a minimum by limiting the
4284 * amount of unrolling done.
4285 */
4286 if (nparts <= 4) {
4287 int i;
4288
b2aa8879 4289 t0 = tcg_temp_new_i64();
d1822297 4290 for (i = 0; i < len_align; i += 8) {
b2aa8879 4291 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
d1822297 4292 tcg_gen_st_i64(t0, cpu_env, vofs + i);
b2aa8879 4293 tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8);
d1822297 4294 }
b2aa8879 4295 tcg_temp_free_i64(t0);
d1822297
RH
4296 } else {
4297 TCGLabel *loop = gen_new_label();
4298 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4299
b2aa8879
RH
4300 /* Copy the clean address into a local temp, live across the loop. */
4301 t0 = clean_addr;
4b4dc975 4302 clean_addr = new_tmp_a64_local(s);
b2aa8879 4303 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4304
b2aa8879 4305 gen_set_label(loop);
d1822297 4306
b2aa8879
RH
4307 t0 = tcg_temp_new_i64();
4308 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4309 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4310
b2aa8879 4311 tp = tcg_temp_new_ptr();
d1822297
RH
4312 tcg_gen_add_ptr(tp, cpu_env, i);
4313 tcg_gen_addi_ptr(i, i, 8);
4314 tcg_gen_st_i64(t0, tp, vofs);
4315 tcg_temp_free_ptr(tp);
b2aa8879 4316 tcg_temp_free_i64(t0);
d1822297
RH
4317
4318 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4319 tcg_temp_free_ptr(i);
4320 }
4321
b2aa8879
RH
4322 /*
4323 * Predicate register loads can be any multiple of 2.
d1822297
RH
4324 * Note that we still store the entire 64-bit unit into cpu_env.
4325 */
4326 if (len_remain) {
b2aa8879 4327 t0 = tcg_temp_new_i64();
d1822297
RH
4328 switch (len_remain) {
4329 case 2:
4330 case 4:
4331 case 8:
b2aa8879
RH
4332 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4333 MO_LE | ctz32(len_remain));
d1822297
RH
4334 break;
4335
4336 case 6:
4337 t1 = tcg_temp_new_i64();
b2aa8879
RH
4338 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4339 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4340 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4341 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4342 tcg_temp_free_i64(t1);
4343 break;
4344
4345 default:
4346 g_assert_not_reached();
4347 }
4348 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4349 tcg_temp_free_i64(t0);
d1822297 4350 }
d1822297
RH
4351}
4352
5047c204 4353/* Similarly for stores. */
19f2acc9 4354static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4355{
19f2acc9
RH
4356 int len_align = QEMU_ALIGN_DOWN(len, 8);
4357 int len_remain = len % 8;
4358 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4359 int midx = get_mem_index(s);
bba87d0a 4360 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4361
bba87d0a
RH
4362 dirty_addr = tcg_temp_new_i64();
4363 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4364 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4365 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4366
4367 /* Note that unpredicated load/store of vector/predicate registers
4368 * are defined as a stream of bytes, which equates to little-endian
4369 * operations on larger quantities. There is no nice way to force
4370 * a little-endian store for aarch64_be-linux-user out of line.
4371 *
4372 * Attempt to keep code expansion to a minimum by limiting the
4373 * amount of unrolling done.
4374 */
4375 if (nparts <= 4) {
4376 int i;
4377
bba87d0a 4378 t0 = tcg_temp_new_i64();
5047c204
RH
4379 for (i = 0; i < len_align; i += 8) {
4380 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
bba87d0a
RH
4381 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4382 tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8);
5047c204 4383 }
bba87d0a 4384 tcg_temp_free_i64(t0);
5047c204
RH
4385 } else {
4386 TCGLabel *loop = gen_new_label();
bba87d0a 4387 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4388
bba87d0a
RH
4389 /* Copy the clean address into a local temp, live across the loop. */
4390 t0 = clean_addr;
4b4dc975 4391 clean_addr = new_tmp_a64_local(s);
bba87d0a 4392 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4393
bba87d0a 4394 gen_set_label(loop);
5047c204 4395
bba87d0a
RH
4396 t0 = tcg_temp_new_i64();
4397 tp = tcg_temp_new_ptr();
4398 tcg_gen_add_ptr(tp, cpu_env, i);
4399 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4400 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4401 tcg_temp_free_ptr(tp);
4402
4403 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4404 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4405 tcg_temp_free_i64(t0);
5047c204
RH
4406
4407 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4408 tcg_temp_free_ptr(i);
4409 }
4410
4411 /* Predicate register stores can be any multiple of 2. */
4412 if (len_remain) {
bba87d0a 4413 t0 = tcg_temp_new_i64();
5047c204 4414 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4415
4416 switch (len_remain) {
4417 case 2:
4418 case 4:
4419 case 8:
bba87d0a
RH
4420 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4421 MO_LE | ctz32(len_remain));
5047c204
RH
4422 break;
4423
4424 case 6:
bba87d0a
RH
4425 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4426 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4427 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4428 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4429 break;
4430
4431 default:
4432 g_assert_not_reached();
4433 }
bba87d0a 4434 tcg_temp_free_i64(t0);
5047c204 4435 }
5047c204
RH
4436}
4437
3a7be554 4438static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4439{
4440 if (sve_access_check(s)) {
4441 int size = vec_full_reg_size(s);
4442 int off = vec_full_reg_offset(s, a->rd);
4443 do_ldr(s, off, size, a->rn, a->imm * size);
4444 }
4445 return true;
4446}
4447
3a7be554 4448static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4449{
4450 if (sve_access_check(s)) {
4451 int size = pred_full_reg_size(s);
4452 int off = pred_full_reg_offset(s, a->rd);
4453 do_ldr(s, off, size, a->rn, a->imm * size);
4454 }
4455 return true;
4456}
c4e7c493 4457
3a7be554 4458static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4459{
4460 if (sve_access_check(s)) {
4461 int size = vec_full_reg_size(s);
4462 int off = vec_full_reg_offset(s, a->rd);
4463 do_str(s, off, size, a->rn, a->imm * size);
4464 }
4465 return true;
4466}
4467
3a7be554 4468static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4469{
4470 if (sve_access_check(s)) {
4471 int size = pred_full_reg_size(s);
4472 int off = pred_full_reg_offset(s, a->rd);
4473 do_str(s, off, size, a->rn, a->imm * size);
4474 }
4475 return true;
4476}
4477
c4e7c493
RH
4478/*
4479 *** SVE Memory - Contiguous Load Group
4480 */
4481
4482/* The memory mode of the dtype. */
14776ab5 4483static const MemOp dtype_mop[16] = {
c4e7c493
RH
4484 MO_UB, MO_UB, MO_UB, MO_UB,
4485 MO_SL, MO_UW, MO_UW, MO_UW,
4486 MO_SW, MO_SW, MO_UL, MO_UL,
4487 MO_SB, MO_SB, MO_SB, MO_Q
4488};
4489
4490#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4491
4492/* The vector element size of dtype. */
4493static const uint8_t dtype_esz[16] = {
4494 0, 1, 2, 3,
4495 3, 1, 2, 3,
4496 3, 2, 2, 3,
4497 3, 2, 1, 3
4498};
4499
4500static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4501 int dtype, uint32_t mte_n, bool is_write,
4502 gen_helper_gvec_mem *fn)
c4e7c493
RH
4503{
4504 unsigned vsz = vec_full_reg_size(s);
4505 TCGv_ptr t_pg;
500d0484 4506 TCGv_i32 t_desc;
206adacf 4507 int desc = 0;
c4e7c493 4508
206adacf
RH
4509 /*
4510 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4511 * registers as pointers, so encode the regno into the data field.
4512 * For consistency, do this even for LD1.
4513 */
9473d0ec 4514 if (s->mte_active[0]) {
206adacf
RH
4515 int msz = dtype_msz(dtype);
4516
4517 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4518 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4519 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4520 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4521 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
4522 desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz);
4523 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4524 } else {
4525 addr = clean_data_tbi(s, addr);
206adacf 4526 }
9473d0ec 4527
206adacf 4528 desc = simd_desc(vsz, vsz, zt | desc);
500d0484 4529 t_desc = tcg_const_i32(desc);
c4e7c493
RH
4530 t_pg = tcg_temp_new_ptr();
4531
4532 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
500d0484 4533 fn(cpu_env, t_pg, addr, t_desc);
c4e7c493
RH
4534
4535 tcg_temp_free_ptr(t_pg);
500d0484 4536 tcg_temp_free_i32(t_desc);
c4e7c493
RH
4537}
4538
4539static void do_ld_zpa(DisasContext *s, int zt, int pg,
4540 TCGv_i64 addr, int dtype, int nreg)
4541{
206adacf
RH
4542 static gen_helper_gvec_mem * const fns[2][2][16][4] = {
4543 { /* mte inactive, little-endian */
4544 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
7d0a57a2 4545 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
206adacf
RH
4546 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4547 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4548 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4549
4550 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4551 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4552 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4553 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4554 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4555
4556 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4557 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4558 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4559 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4560 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4561
4562 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4563 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4564 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4565 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4566 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4567
4568 /* mte inactive, big-endian */
4569 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4570 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4571 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4572 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4573 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4574
4575 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4576 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4577 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4578 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4579 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4580
4581 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4582 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4583 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4584 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4585 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4586
4587 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4588 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4589 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4590 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4591 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4592
4593 { /* mte active, little-endian */
4594 { { gen_helper_sve_ld1bb_r_mte,
4595 gen_helper_sve_ld2bb_r_mte,
4596 gen_helper_sve_ld3bb_r_mte,
4597 gen_helper_sve_ld4bb_r_mte },
4598 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4599 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4600 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4601
4602 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4603 { gen_helper_sve_ld1hh_le_r_mte,
4604 gen_helper_sve_ld2hh_le_r_mte,
4605 gen_helper_sve_ld3hh_le_r_mte,
4606 gen_helper_sve_ld4hh_le_r_mte },
4607 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4608 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4609
4610 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4611 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4612 { gen_helper_sve_ld1ss_le_r_mte,
4613 gen_helper_sve_ld2ss_le_r_mte,
4614 gen_helper_sve_ld3ss_le_r_mte,
4615 gen_helper_sve_ld4ss_le_r_mte },
4616 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4617
4618 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4619 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4620 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4621 { gen_helper_sve_ld1dd_le_r_mte,
4622 gen_helper_sve_ld2dd_le_r_mte,
4623 gen_helper_sve_ld3dd_le_r_mte,
4624 gen_helper_sve_ld4dd_le_r_mte } },
4625
4626 /* mte active, big-endian */
4627 { { gen_helper_sve_ld1bb_r_mte,
4628 gen_helper_sve_ld2bb_r_mte,
4629 gen_helper_sve_ld3bb_r_mte,
4630 gen_helper_sve_ld4bb_r_mte },
4631 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4632 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4633 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4634
4635 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4636 { gen_helper_sve_ld1hh_be_r_mte,
4637 gen_helper_sve_ld2hh_be_r_mte,
4638 gen_helper_sve_ld3hh_be_r_mte,
4639 gen_helper_sve_ld4hh_be_r_mte },
4640 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4641 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4642
4643 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4644 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4645 { gen_helper_sve_ld1ss_be_r_mte,
4646 gen_helper_sve_ld2ss_be_r_mte,
4647 gen_helper_sve_ld3ss_be_r_mte,
4648 gen_helper_sve_ld4ss_be_r_mte },
4649 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4650
4651 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4652 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4653 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4654 { gen_helper_sve_ld1dd_be_r_mte,
4655 gen_helper_sve_ld2dd_be_r_mte,
4656 gen_helper_sve_ld3dd_be_r_mte,
4657 gen_helper_sve_ld4dd_be_r_mte } } },
c4e7c493 4658 };
206adacf
RH
4659 gen_helper_gvec_mem *fn
4660 = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4661
206adacf
RH
4662 /*
4663 * While there are holes in the table, they are not
c4e7c493
RH
4664 * accessible via the instruction encoding.
4665 */
4666 assert(fn != NULL);
206adacf 4667 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4668}
4669
3a7be554 4670static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4671{
4672 if (a->rm == 31) {
4673 return false;
4674 }
4675 if (sve_access_check(s)) {
4676 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4677 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4678 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4679 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4680 }
4681 return true;
4682}
4683
3a7be554 4684static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4685{
4686 if (sve_access_check(s)) {
4687 int vsz = vec_full_reg_size(s);
4688 int elements = vsz >> dtype_esz[a->dtype];
4689 TCGv_i64 addr = new_tmp_a64(s);
4690
4691 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4692 (a->imm * elements * (a->nreg + 1))
4693 << dtype_msz(a->dtype));
4694 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4695 }
4696 return true;
4697}
e2654d75 4698
3a7be554 4699static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4700{
aa13f7c3
RH
4701 static gen_helper_gvec_mem * const fns[2][2][16] = {
4702 { /* mte inactive, little-endian */
4703 { gen_helper_sve_ldff1bb_r,
4704 gen_helper_sve_ldff1bhu_r,
4705 gen_helper_sve_ldff1bsu_r,
4706 gen_helper_sve_ldff1bdu_r,
4707
4708 gen_helper_sve_ldff1sds_le_r,
4709 gen_helper_sve_ldff1hh_le_r,
4710 gen_helper_sve_ldff1hsu_le_r,
4711 gen_helper_sve_ldff1hdu_le_r,
4712
4713 gen_helper_sve_ldff1hds_le_r,
4714 gen_helper_sve_ldff1hss_le_r,
4715 gen_helper_sve_ldff1ss_le_r,
4716 gen_helper_sve_ldff1sdu_le_r,
4717
4718 gen_helper_sve_ldff1bds_r,
4719 gen_helper_sve_ldff1bss_r,
4720 gen_helper_sve_ldff1bhs_r,
4721 gen_helper_sve_ldff1dd_le_r },
4722
4723 /* mte inactive, big-endian */
4724 { gen_helper_sve_ldff1bb_r,
4725 gen_helper_sve_ldff1bhu_r,
4726 gen_helper_sve_ldff1bsu_r,
4727 gen_helper_sve_ldff1bdu_r,
4728
4729 gen_helper_sve_ldff1sds_be_r,
4730 gen_helper_sve_ldff1hh_be_r,
4731 gen_helper_sve_ldff1hsu_be_r,
4732 gen_helper_sve_ldff1hdu_be_r,
4733
4734 gen_helper_sve_ldff1hds_be_r,
4735 gen_helper_sve_ldff1hss_be_r,
4736 gen_helper_sve_ldff1ss_be_r,
4737 gen_helper_sve_ldff1sdu_be_r,
4738
4739 gen_helper_sve_ldff1bds_r,
4740 gen_helper_sve_ldff1bss_r,
4741 gen_helper_sve_ldff1bhs_r,
4742 gen_helper_sve_ldff1dd_be_r } },
4743
4744 { /* mte active, little-endian */
4745 { gen_helper_sve_ldff1bb_r_mte,
4746 gen_helper_sve_ldff1bhu_r_mte,
4747 gen_helper_sve_ldff1bsu_r_mte,
4748 gen_helper_sve_ldff1bdu_r_mte,
4749
4750 gen_helper_sve_ldff1sds_le_r_mte,
4751 gen_helper_sve_ldff1hh_le_r_mte,
4752 gen_helper_sve_ldff1hsu_le_r_mte,
4753 gen_helper_sve_ldff1hdu_le_r_mte,
4754
4755 gen_helper_sve_ldff1hds_le_r_mte,
4756 gen_helper_sve_ldff1hss_le_r_mte,
4757 gen_helper_sve_ldff1ss_le_r_mte,
4758 gen_helper_sve_ldff1sdu_le_r_mte,
4759
4760 gen_helper_sve_ldff1bds_r_mte,
4761 gen_helper_sve_ldff1bss_r_mte,
4762 gen_helper_sve_ldff1bhs_r_mte,
4763 gen_helper_sve_ldff1dd_le_r_mte },
4764
4765 /* mte active, big-endian */
4766 { gen_helper_sve_ldff1bb_r_mte,
4767 gen_helper_sve_ldff1bhu_r_mte,
4768 gen_helper_sve_ldff1bsu_r_mte,
4769 gen_helper_sve_ldff1bdu_r_mte,
4770
4771 gen_helper_sve_ldff1sds_be_r_mte,
4772 gen_helper_sve_ldff1hh_be_r_mte,
4773 gen_helper_sve_ldff1hsu_be_r_mte,
4774 gen_helper_sve_ldff1hdu_be_r_mte,
4775
4776 gen_helper_sve_ldff1hds_be_r_mte,
4777 gen_helper_sve_ldff1hss_be_r_mte,
4778 gen_helper_sve_ldff1ss_be_r_mte,
4779 gen_helper_sve_ldff1sdu_be_r_mte,
4780
4781 gen_helper_sve_ldff1bds_r_mte,
4782 gen_helper_sve_ldff1bss_r_mte,
4783 gen_helper_sve_ldff1bhs_r_mte,
4784 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4785 };
4786
4787 if (sve_access_check(s)) {
4788 TCGv_i64 addr = new_tmp_a64(s);
4789 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4790 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4791 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4792 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4793 }
4794 return true;
4795}
4796
3a7be554 4797static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4798{
aa13f7c3
RH
4799 static gen_helper_gvec_mem * const fns[2][2][16] = {
4800 { /* mte inactive, little-endian */
4801 { gen_helper_sve_ldnf1bb_r,
4802 gen_helper_sve_ldnf1bhu_r,
4803 gen_helper_sve_ldnf1bsu_r,
4804 gen_helper_sve_ldnf1bdu_r,
4805
4806 gen_helper_sve_ldnf1sds_le_r,
4807 gen_helper_sve_ldnf1hh_le_r,
4808 gen_helper_sve_ldnf1hsu_le_r,
4809 gen_helper_sve_ldnf1hdu_le_r,
4810
4811 gen_helper_sve_ldnf1hds_le_r,
4812 gen_helper_sve_ldnf1hss_le_r,
4813 gen_helper_sve_ldnf1ss_le_r,
4814 gen_helper_sve_ldnf1sdu_le_r,
4815
4816 gen_helper_sve_ldnf1bds_r,
4817 gen_helper_sve_ldnf1bss_r,
4818 gen_helper_sve_ldnf1bhs_r,
4819 gen_helper_sve_ldnf1dd_le_r },
4820
4821 /* mte inactive, big-endian */
4822 { gen_helper_sve_ldnf1bb_r,
4823 gen_helper_sve_ldnf1bhu_r,
4824 gen_helper_sve_ldnf1bsu_r,
4825 gen_helper_sve_ldnf1bdu_r,
4826
4827 gen_helper_sve_ldnf1sds_be_r,
4828 gen_helper_sve_ldnf1hh_be_r,
4829 gen_helper_sve_ldnf1hsu_be_r,
4830 gen_helper_sve_ldnf1hdu_be_r,
4831
4832 gen_helper_sve_ldnf1hds_be_r,
4833 gen_helper_sve_ldnf1hss_be_r,
4834 gen_helper_sve_ldnf1ss_be_r,
4835 gen_helper_sve_ldnf1sdu_be_r,
4836
4837 gen_helper_sve_ldnf1bds_r,
4838 gen_helper_sve_ldnf1bss_r,
4839 gen_helper_sve_ldnf1bhs_r,
4840 gen_helper_sve_ldnf1dd_be_r } },
4841
4842 { /* mte inactive, little-endian */
4843 { gen_helper_sve_ldnf1bb_r_mte,
4844 gen_helper_sve_ldnf1bhu_r_mte,
4845 gen_helper_sve_ldnf1bsu_r_mte,
4846 gen_helper_sve_ldnf1bdu_r_mte,
4847
4848 gen_helper_sve_ldnf1sds_le_r_mte,
4849 gen_helper_sve_ldnf1hh_le_r_mte,
4850 gen_helper_sve_ldnf1hsu_le_r_mte,
4851 gen_helper_sve_ldnf1hdu_le_r_mte,
4852
4853 gen_helper_sve_ldnf1hds_le_r_mte,
4854 gen_helper_sve_ldnf1hss_le_r_mte,
4855 gen_helper_sve_ldnf1ss_le_r_mte,
4856 gen_helper_sve_ldnf1sdu_le_r_mte,
4857
4858 gen_helper_sve_ldnf1bds_r_mte,
4859 gen_helper_sve_ldnf1bss_r_mte,
4860 gen_helper_sve_ldnf1bhs_r_mte,
4861 gen_helper_sve_ldnf1dd_le_r_mte },
4862
4863 /* mte inactive, big-endian */
4864 { gen_helper_sve_ldnf1bb_r_mte,
4865 gen_helper_sve_ldnf1bhu_r_mte,
4866 gen_helper_sve_ldnf1bsu_r_mte,
4867 gen_helper_sve_ldnf1bdu_r_mte,
4868
4869 gen_helper_sve_ldnf1sds_be_r_mte,
4870 gen_helper_sve_ldnf1hh_be_r_mte,
4871 gen_helper_sve_ldnf1hsu_be_r_mte,
4872 gen_helper_sve_ldnf1hdu_be_r_mte,
4873
4874 gen_helper_sve_ldnf1hds_be_r_mte,
4875 gen_helper_sve_ldnf1hss_be_r_mte,
4876 gen_helper_sve_ldnf1ss_be_r_mte,
4877 gen_helper_sve_ldnf1sdu_be_r_mte,
4878
4879 gen_helper_sve_ldnf1bds_r_mte,
4880 gen_helper_sve_ldnf1bss_r_mte,
4881 gen_helper_sve_ldnf1bhs_r_mte,
4882 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4883 };
4884
4885 if (sve_access_check(s)) {
4886 int vsz = vec_full_reg_size(s);
4887 int elements = vsz >> dtype_esz[a->dtype];
4888 int off = (a->imm * elements) << dtype_msz(a->dtype);
4889 TCGv_i64 addr = new_tmp_a64(s);
4890
4891 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4892 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4893 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4894 }
4895 return true;
4896}
1a039c7e 4897
05abe304
RH
4898static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4899{
7d0a57a2
RH
4900 static gen_helper_gvec_mem * const fns[2][4] = {
4901 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4902 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4903 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4904 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
05abe304
RH
4905 };
4906 unsigned vsz = vec_full_reg_size(s);
4907 TCGv_ptr t_pg;
500d0484
RH
4908 TCGv_i32 t_desc;
4909 int desc, poff;
05abe304
RH
4910
4911 /* Load the first quadword using the normal predicated load helpers. */
ba080b86 4912 desc = simd_desc(16, 16, zt);
500d0484 4913 t_desc = tcg_const_i32(desc);
2a99ab2b
RH
4914
4915 poff = pred_full_reg_offset(s, pg);
4916 if (vsz > 16) {
4917 /*
4918 * Zero-extend the first 16 bits of the predicate into a temporary.
4919 * This avoids triggering an assert making sure we don't have bits
4920 * set within a predicate beyond VQ, but we have lowered VQ to 1
4921 * for this load operation.
4922 */
4923 TCGv_i64 tmp = tcg_temp_new_i64();
4924#ifdef HOST_WORDS_BIGENDIAN
4925 poff += 6;
4926#endif
4927 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4928
4929 poff = offsetof(CPUARMState, vfp.preg_tmp);
4930 tcg_gen_st_i64(tmp, cpu_env, poff);
4931 tcg_temp_free_i64(tmp);
4932 }
4933
05abe304 4934 t_pg = tcg_temp_new_ptr();
2a99ab2b 4935 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 4936
500d0484 4937 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
05abe304
RH
4938
4939 tcg_temp_free_ptr(t_pg);
500d0484 4940 tcg_temp_free_i32(t_desc);
05abe304
RH
4941
4942 /* Replicate that first quadword. */
4943 if (vsz > 16) {
4944 unsigned dofs = vec_full_reg_offset(s, zt);
4945 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4946 }
4947}
4948
3a7be554 4949static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
4950{
4951 if (a->rm == 31) {
4952 return false;
4953 }
4954 if (sve_access_check(s)) {
4955 int msz = dtype_msz(a->dtype);
4956 TCGv_i64 addr = new_tmp_a64(s);
4957 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4958 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4959 do_ldrq(s, a->rd, a->pg, addr, msz);
4960 }
4961 return true;
4962}
4963
3a7be554 4964static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
4965{
4966 if (sve_access_check(s)) {
4967 TCGv_i64 addr = new_tmp_a64(s);
4968 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4969 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4970 }
4971 return true;
4972}
4973
68459864 4974/* Load and broadcast element. */
3a7be554 4975static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 4976{
68459864
RH
4977 unsigned vsz = vec_full_reg_size(s);
4978 unsigned psz = pred_full_reg_size(s);
4979 unsigned esz = dtype_esz[a->dtype];
d0e372b0 4980 unsigned msz = dtype_msz(a->dtype);
c0ed9166 4981 TCGLabel *over;
4ac430e1 4982 TCGv_i64 temp, clean_addr;
68459864 4983
c0ed9166
RH
4984 if (!sve_access_check(s)) {
4985 return true;
4986 }
4987
4988 over = gen_new_label();
4989
68459864
RH
4990 /* If the guarding predicate has no bits set, no load occurs. */
4991 if (psz <= 8) {
4992 /* Reduce the pred_esz_masks value simply to reduce the
4993 * size of the code generated here.
4994 */
4995 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4996 temp = tcg_temp_new_i64();
4997 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4998 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4999 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5000 tcg_temp_free_i64(temp);
5001 } else {
5002 TCGv_i32 t32 = tcg_temp_new_i32();
5003 find_last_active(s, t32, esz, a->pg);
5004 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5005 tcg_temp_free_i32(t32);
5006 }
5007
5008 /* Load the data. */
5009 temp = tcg_temp_new_i64();
d0e372b0 5010 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5011 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5012
5013 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
68459864
RH
5014 s->be_data | dtype_mop[a->dtype]);
5015
5016 /* Broadcast to *all* elements. */
5017 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5018 vsz, vsz, temp);
5019 tcg_temp_free_i64(temp);
5020
5021 /* Zero the inactive elements. */
5022 gen_set_label(over);
60245996 5023 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
68459864
RH
5024}
5025
1a039c7e
RH
5026static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5027 int msz, int esz, int nreg)
5028{
71b9f394
RH
5029 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5030 { { { gen_helper_sve_st1bb_r,
5031 gen_helper_sve_st1bh_r,
5032 gen_helper_sve_st1bs_r,
5033 gen_helper_sve_st1bd_r },
5034 { NULL,
5035 gen_helper_sve_st1hh_le_r,
5036 gen_helper_sve_st1hs_le_r,
5037 gen_helper_sve_st1hd_le_r },
5038 { NULL, NULL,
5039 gen_helper_sve_st1ss_le_r,
5040 gen_helper_sve_st1sd_le_r },
5041 { NULL, NULL, NULL,
5042 gen_helper_sve_st1dd_le_r } },
5043 { { gen_helper_sve_st1bb_r,
5044 gen_helper_sve_st1bh_r,
5045 gen_helper_sve_st1bs_r,
5046 gen_helper_sve_st1bd_r },
5047 { NULL,
5048 gen_helper_sve_st1hh_be_r,
5049 gen_helper_sve_st1hs_be_r,
5050 gen_helper_sve_st1hd_be_r },
5051 { NULL, NULL,
5052 gen_helper_sve_st1ss_be_r,
5053 gen_helper_sve_st1sd_be_r },
5054 { NULL, NULL, NULL,
5055 gen_helper_sve_st1dd_be_r } } },
5056
5057 { { { gen_helper_sve_st1bb_r_mte,
5058 gen_helper_sve_st1bh_r_mte,
5059 gen_helper_sve_st1bs_r_mte,
5060 gen_helper_sve_st1bd_r_mte },
5061 { NULL,
5062 gen_helper_sve_st1hh_le_r_mte,
5063 gen_helper_sve_st1hs_le_r_mte,
5064 gen_helper_sve_st1hd_le_r_mte },
5065 { NULL, NULL,
5066 gen_helper_sve_st1ss_le_r_mte,
5067 gen_helper_sve_st1sd_le_r_mte },
5068 { NULL, NULL, NULL,
5069 gen_helper_sve_st1dd_le_r_mte } },
5070 { { gen_helper_sve_st1bb_r_mte,
5071 gen_helper_sve_st1bh_r_mte,
5072 gen_helper_sve_st1bs_r_mte,
5073 gen_helper_sve_st1bd_r_mte },
5074 { NULL,
5075 gen_helper_sve_st1hh_be_r_mte,
5076 gen_helper_sve_st1hs_be_r_mte,
5077 gen_helper_sve_st1hd_be_r_mte },
5078 { NULL, NULL,
5079 gen_helper_sve_st1ss_be_r_mte,
5080 gen_helper_sve_st1sd_be_r_mte },
5081 { NULL, NULL, NULL,
5082 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5083 };
71b9f394
RH
5084 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5085 { { { gen_helper_sve_st2bb_r,
5086 gen_helper_sve_st2hh_le_r,
5087 gen_helper_sve_st2ss_le_r,
5088 gen_helper_sve_st2dd_le_r },
5089 { gen_helper_sve_st3bb_r,
5090 gen_helper_sve_st3hh_le_r,
5091 gen_helper_sve_st3ss_le_r,
5092 gen_helper_sve_st3dd_le_r },
5093 { gen_helper_sve_st4bb_r,
5094 gen_helper_sve_st4hh_le_r,
5095 gen_helper_sve_st4ss_le_r,
5096 gen_helper_sve_st4dd_le_r } },
5097 { { gen_helper_sve_st2bb_r,
5098 gen_helper_sve_st2hh_be_r,
5099 gen_helper_sve_st2ss_be_r,
5100 gen_helper_sve_st2dd_be_r },
5101 { gen_helper_sve_st3bb_r,
5102 gen_helper_sve_st3hh_be_r,
5103 gen_helper_sve_st3ss_be_r,
5104 gen_helper_sve_st3dd_be_r },
5105 { gen_helper_sve_st4bb_r,
5106 gen_helper_sve_st4hh_be_r,
5107 gen_helper_sve_st4ss_be_r,
5108 gen_helper_sve_st4dd_be_r } } },
5109 { { { gen_helper_sve_st2bb_r_mte,
5110 gen_helper_sve_st2hh_le_r_mte,
5111 gen_helper_sve_st2ss_le_r_mte,
5112 gen_helper_sve_st2dd_le_r_mte },
5113 { gen_helper_sve_st3bb_r_mte,
5114 gen_helper_sve_st3hh_le_r_mte,
5115 gen_helper_sve_st3ss_le_r_mte,
5116 gen_helper_sve_st3dd_le_r_mte },
5117 { gen_helper_sve_st4bb_r_mte,
5118 gen_helper_sve_st4hh_le_r_mte,
5119 gen_helper_sve_st4ss_le_r_mte,
5120 gen_helper_sve_st4dd_le_r_mte } },
5121 { { gen_helper_sve_st2bb_r_mte,
5122 gen_helper_sve_st2hh_be_r_mte,
5123 gen_helper_sve_st2ss_be_r_mte,
5124 gen_helper_sve_st2dd_be_r_mte },
5125 { gen_helper_sve_st3bb_r_mte,
5126 gen_helper_sve_st3hh_be_r_mte,
5127 gen_helper_sve_st3ss_be_r_mte,
5128 gen_helper_sve_st3dd_be_r_mte },
5129 { gen_helper_sve_st4bb_r_mte,
5130 gen_helper_sve_st4hh_be_r_mte,
5131 gen_helper_sve_st4ss_be_r_mte,
5132 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5133 };
5134 gen_helper_gvec_mem *fn;
28d57f2d 5135 int be = s->be_data == MO_BE;
1a039c7e
RH
5136
5137 if (nreg == 0) {
5138 /* ST1 */
71b9f394
RH
5139 fn = fn_single[s->mte_active[0]][be][msz][esz];
5140 nreg = 1;
1a039c7e
RH
5141 } else {
5142 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5143 assert(msz == esz);
71b9f394 5144 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5145 }
5146 assert(fn != NULL);
71b9f394 5147 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5148}
5149
3a7be554 5150static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5151{
5152 if (a->rm == 31 || a->msz > a->esz) {
5153 return false;
5154 }
5155 if (sve_access_check(s)) {
5156 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5157 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5158 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5159 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5160 }
5161 return true;
5162}
5163
3a7be554 5164static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5165{
5166 if (a->msz > a->esz) {
5167 return false;
5168 }
5169 if (sve_access_check(s)) {
5170 int vsz = vec_full_reg_size(s);
5171 int elements = vsz >> a->esz;
5172 TCGv_i64 addr = new_tmp_a64(s);
5173
5174 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5175 (a->imm * elements * (a->nreg + 1)) << a->msz);
5176 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5177 }
5178 return true;
5179}
f6dbf62a
RH
5180
5181/*
5182 *** SVE gather loads / scatter stores
5183 */
5184
500d0484 5185static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5186 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5187 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5188{
5189 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5190 TCGv_ptr t_zm = tcg_temp_new_ptr();
5191 TCGv_ptr t_pg = tcg_temp_new_ptr();
5192 TCGv_ptr t_zt = tcg_temp_new_ptr();
500d0484 5193 TCGv_i32 t_desc;
d28d12f0 5194 int desc = 0;
500d0484 5195
d28d12f0
RH
5196 if (s->mte_active[0]) {
5197 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5198 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5199 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5200 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5201 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
5202 desc <<= SVE_MTEDESC_SHIFT;
5203 }
cdecb3fc 5204 desc = simd_desc(vsz, vsz, desc | scale);
500d0484 5205 t_desc = tcg_const_i32(desc);
f6dbf62a
RH
5206
5207 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5208 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5209 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
500d0484 5210 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
f6dbf62a
RH
5211
5212 tcg_temp_free_ptr(t_zt);
5213 tcg_temp_free_ptr(t_zm);
5214 tcg_temp_free_ptr(t_pg);
500d0484 5215 tcg_temp_free_i32(t_desc);
f6dbf62a
RH
5216}
5217
d28d12f0
RH
5218/* Indexed by [mte][be][ff][xs][u][msz]. */
5219static gen_helper_gvec_mem_scatter * const
5220gather_load_fn32[2][2][2][2][2][3] = {
5221 { /* MTE Inactive */
5222 { /* Little-endian */
5223 { { { gen_helper_sve_ldbss_zsu,
5224 gen_helper_sve_ldhss_le_zsu,
5225 NULL, },
5226 { gen_helper_sve_ldbsu_zsu,
5227 gen_helper_sve_ldhsu_le_zsu,
5228 gen_helper_sve_ldss_le_zsu, } },
5229 { { gen_helper_sve_ldbss_zss,
5230 gen_helper_sve_ldhss_le_zss,
5231 NULL, },
5232 { gen_helper_sve_ldbsu_zss,
5233 gen_helper_sve_ldhsu_le_zss,
5234 gen_helper_sve_ldss_le_zss, } } },
5235
5236 /* First-fault */
5237 { { { gen_helper_sve_ldffbss_zsu,
5238 gen_helper_sve_ldffhss_le_zsu,
5239 NULL, },
5240 { gen_helper_sve_ldffbsu_zsu,
5241 gen_helper_sve_ldffhsu_le_zsu,
5242 gen_helper_sve_ldffss_le_zsu, } },
5243 { { gen_helper_sve_ldffbss_zss,
5244 gen_helper_sve_ldffhss_le_zss,
5245 NULL, },
5246 { gen_helper_sve_ldffbsu_zss,
5247 gen_helper_sve_ldffhsu_le_zss,
5248 gen_helper_sve_ldffss_le_zss, } } } },
5249
5250 { /* Big-endian */
5251 { { { gen_helper_sve_ldbss_zsu,
5252 gen_helper_sve_ldhss_be_zsu,
5253 NULL, },
5254 { gen_helper_sve_ldbsu_zsu,
5255 gen_helper_sve_ldhsu_be_zsu,
5256 gen_helper_sve_ldss_be_zsu, } },
5257 { { gen_helper_sve_ldbss_zss,
5258 gen_helper_sve_ldhss_be_zss,
5259 NULL, },
5260 { gen_helper_sve_ldbsu_zss,
5261 gen_helper_sve_ldhsu_be_zss,
5262 gen_helper_sve_ldss_be_zss, } } },
5263
5264 /* First-fault */
5265 { { { gen_helper_sve_ldffbss_zsu,
5266 gen_helper_sve_ldffhss_be_zsu,
5267 NULL, },
5268 { gen_helper_sve_ldffbsu_zsu,
5269 gen_helper_sve_ldffhsu_be_zsu,
5270 gen_helper_sve_ldffss_be_zsu, } },
5271 { { gen_helper_sve_ldffbss_zss,
5272 gen_helper_sve_ldffhss_be_zss,
5273 NULL, },
5274 { gen_helper_sve_ldffbsu_zss,
5275 gen_helper_sve_ldffhsu_be_zss,
5276 gen_helper_sve_ldffss_be_zss, } } } } },
5277 { /* MTE Active */
5278 { /* Little-endian */
5279 { { { gen_helper_sve_ldbss_zsu_mte,
5280 gen_helper_sve_ldhss_le_zsu_mte,
5281 NULL, },
5282 { gen_helper_sve_ldbsu_zsu_mte,
5283 gen_helper_sve_ldhsu_le_zsu_mte,
5284 gen_helper_sve_ldss_le_zsu_mte, } },
5285 { { gen_helper_sve_ldbss_zss_mte,
5286 gen_helper_sve_ldhss_le_zss_mte,
5287 NULL, },
5288 { gen_helper_sve_ldbsu_zss_mte,
5289 gen_helper_sve_ldhsu_le_zss_mte,
5290 gen_helper_sve_ldss_le_zss_mte, } } },
5291
5292 /* First-fault */
5293 { { { gen_helper_sve_ldffbss_zsu_mte,
5294 gen_helper_sve_ldffhss_le_zsu_mte,
5295 NULL, },
5296 { gen_helper_sve_ldffbsu_zsu_mte,
5297 gen_helper_sve_ldffhsu_le_zsu_mte,
5298 gen_helper_sve_ldffss_le_zsu_mte, } },
5299 { { gen_helper_sve_ldffbss_zss_mte,
5300 gen_helper_sve_ldffhss_le_zss_mte,
5301 NULL, },
5302 { gen_helper_sve_ldffbsu_zss_mte,
5303 gen_helper_sve_ldffhsu_le_zss_mte,
5304 gen_helper_sve_ldffss_le_zss_mte, } } } },
5305
5306 { /* Big-endian */
5307 { { { gen_helper_sve_ldbss_zsu_mte,
5308 gen_helper_sve_ldhss_be_zsu_mte,
5309 NULL, },
5310 { gen_helper_sve_ldbsu_zsu_mte,
5311 gen_helper_sve_ldhsu_be_zsu_mte,
5312 gen_helper_sve_ldss_be_zsu_mte, } },
5313 { { gen_helper_sve_ldbss_zss_mte,
5314 gen_helper_sve_ldhss_be_zss_mte,
5315 NULL, },
5316 { gen_helper_sve_ldbsu_zss_mte,
5317 gen_helper_sve_ldhsu_be_zss_mte,
5318 gen_helper_sve_ldss_be_zss_mte, } } },
5319
5320 /* First-fault */
5321 { { { gen_helper_sve_ldffbss_zsu_mte,
5322 gen_helper_sve_ldffhss_be_zsu_mte,
5323 NULL, },
5324 { gen_helper_sve_ldffbsu_zsu_mte,
5325 gen_helper_sve_ldffhsu_be_zsu_mte,
5326 gen_helper_sve_ldffss_be_zsu_mte, } },
5327 { { gen_helper_sve_ldffbss_zss_mte,
5328 gen_helper_sve_ldffhss_be_zss_mte,
5329 NULL, },
5330 { gen_helper_sve_ldffbsu_zss_mte,
5331 gen_helper_sve_ldffhsu_be_zss_mte,
5332 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5333};
5334
5335/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5336static gen_helper_gvec_mem_scatter * const
5337gather_load_fn64[2][2][2][3][2][4] = {
5338 { /* MTE Inactive */
5339 { /* Little-endian */
5340 { { { gen_helper_sve_ldbds_zsu,
5341 gen_helper_sve_ldhds_le_zsu,
5342 gen_helper_sve_ldsds_le_zsu,
5343 NULL, },
5344 { gen_helper_sve_ldbdu_zsu,
5345 gen_helper_sve_ldhdu_le_zsu,
5346 gen_helper_sve_ldsdu_le_zsu,
5347 gen_helper_sve_lddd_le_zsu, } },
5348 { { gen_helper_sve_ldbds_zss,
5349 gen_helper_sve_ldhds_le_zss,
5350 gen_helper_sve_ldsds_le_zss,
5351 NULL, },
5352 { gen_helper_sve_ldbdu_zss,
5353 gen_helper_sve_ldhdu_le_zss,
5354 gen_helper_sve_ldsdu_le_zss,
5355 gen_helper_sve_lddd_le_zss, } },
5356 { { gen_helper_sve_ldbds_zd,
5357 gen_helper_sve_ldhds_le_zd,
5358 gen_helper_sve_ldsds_le_zd,
5359 NULL, },
5360 { gen_helper_sve_ldbdu_zd,
5361 gen_helper_sve_ldhdu_le_zd,
5362 gen_helper_sve_ldsdu_le_zd,
5363 gen_helper_sve_lddd_le_zd, } } },
5364
5365 /* First-fault */
5366 { { { gen_helper_sve_ldffbds_zsu,
5367 gen_helper_sve_ldffhds_le_zsu,
5368 gen_helper_sve_ldffsds_le_zsu,
5369 NULL, },
5370 { gen_helper_sve_ldffbdu_zsu,
5371 gen_helper_sve_ldffhdu_le_zsu,
5372 gen_helper_sve_ldffsdu_le_zsu,
5373 gen_helper_sve_ldffdd_le_zsu, } },
5374 { { gen_helper_sve_ldffbds_zss,
5375 gen_helper_sve_ldffhds_le_zss,
5376 gen_helper_sve_ldffsds_le_zss,
5377 NULL, },
5378 { gen_helper_sve_ldffbdu_zss,
5379 gen_helper_sve_ldffhdu_le_zss,
5380 gen_helper_sve_ldffsdu_le_zss,
5381 gen_helper_sve_ldffdd_le_zss, } },
5382 { { gen_helper_sve_ldffbds_zd,
5383 gen_helper_sve_ldffhds_le_zd,
5384 gen_helper_sve_ldffsds_le_zd,
5385 NULL, },
5386 { gen_helper_sve_ldffbdu_zd,
5387 gen_helper_sve_ldffhdu_le_zd,
5388 gen_helper_sve_ldffsdu_le_zd,
5389 gen_helper_sve_ldffdd_le_zd, } } } },
5390 { /* Big-endian */
5391 { { { gen_helper_sve_ldbds_zsu,
5392 gen_helper_sve_ldhds_be_zsu,
5393 gen_helper_sve_ldsds_be_zsu,
5394 NULL, },
5395 { gen_helper_sve_ldbdu_zsu,
5396 gen_helper_sve_ldhdu_be_zsu,
5397 gen_helper_sve_ldsdu_be_zsu,
5398 gen_helper_sve_lddd_be_zsu, } },
5399 { { gen_helper_sve_ldbds_zss,
5400 gen_helper_sve_ldhds_be_zss,
5401 gen_helper_sve_ldsds_be_zss,
5402 NULL, },
5403 { gen_helper_sve_ldbdu_zss,
5404 gen_helper_sve_ldhdu_be_zss,
5405 gen_helper_sve_ldsdu_be_zss,
5406 gen_helper_sve_lddd_be_zss, } },
5407 { { gen_helper_sve_ldbds_zd,
5408 gen_helper_sve_ldhds_be_zd,
5409 gen_helper_sve_ldsds_be_zd,
5410 NULL, },
5411 { gen_helper_sve_ldbdu_zd,
5412 gen_helper_sve_ldhdu_be_zd,
5413 gen_helper_sve_ldsdu_be_zd,
5414 gen_helper_sve_lddd_be_zd, } } },
5415
5416 /* First-fault */
5417 { { { gen_helper_sve_ldffbds_zsu,
5418 gen_helper_sve_ldffhds_be_zsu,
5419 gen_helper_sve_ldffsds_be_zsu,
5420 NULL, },
5421 { gen_helper_sve_ldffbdu_zsu,
5422 gen_helper_sve_ldffhdu_be_zsu,
5423 gen_helper_sve_ldffsdu_be_zsu,
5424 gen_helper_sve_ldffdd_be_zsu, } },
5425 { { gen_helper_sve_ldffbds_zss,
5426 gen_helper_sve_ldffhds_be_zss,
5427 gen_helper_sve_ldffsds_be_zss,
5428 NULL, },
5429 { gen_helper_sve_ldffbdu_zss,
5430 gen_helper_sve_ldffhdu_be_zss,
5431 gen_helper_sve_ldffsdu_be_zss,
5432 gen_helper_sve_ldffdd_be_zss, } },
5433 { { gen_helper_sve_ldffbds_zd,
5434 gen_helper_sve_ldffhds_be_zd,
5435 gen_helper_sve_ldffsds_be_zd,
5436 NULL, },
5437 { gen_helper_sve_ldffbdu_zd,
5438 gen_helper_sve_ldffhdu_be_zd,
5439 gen_helper_sve_ldffsdu_be_zd,
5440 gen_helper_sve_ldffdd_be_zd, } } } } },
5441 { /* MTE Active */
5442 { /* Little-endian */
5443 { { { gen_helper_sve_ldbds_zsu_mte,
5444 gen_helper_sve_ldhds_le_zsu_mte,
5445 gen_helper_sve_ldsds_le_zsu_mte,
5446 NULL, },
5447 { gen_helper_sve_ldbdu_zsu_mte,
5448 gen_helper_sve_ldhdu_le_zsu_mte,
5449 gen_helper_sve_ldsdu_le_zsu_mte,
5450 gen_helper_sve_lddd_le_zsu_mte, } },
5451 { { gen_helper_sve_ldbds_zss_mte,
5452 gen_helper_sve_ldhds_le_zss_mte,
5453 gen_helper_sve_ldsds_le_zss_mte,
5454 NULL, },
5455 { gen_helper_sve_ldbdu_zss_mte,
5456 gen_helper_sve_ldhdu_le_zss_mte,
5457 gen_helper_sve_ldsdu_le_zss_mte,
5458 gen_helper_sve_lddd_le_zss_mte, } },
5459 { { gen_helper_sve_ldbds_zd_mte,
5460 gen_helper_sve_ldhds_le_zd_mte,
5461 gen_helper_sve_ldsds_le_zd_mte,
5462 NULL, },
5463 { gen_helper_sve_ldbdu_zd_mte,
5464 gen_helper_sve_ldhdu_le_zd_mte,
5465 gen_helper_sve_ldsdu_le_zd_mte,
5466 gen_helper_sve_lddd_le_zd_mte, } } },
5467
5468 /* First-fault */
5469 { { { gen_helper_sve_ldffbds_zsu_mte,
5470 gen_helper_sve_ldffhds_le_zsu_mte,
5471 gen_helper_sve_ldffsds_le_zsu_mte,
5472 NULL, },
5473 { gen_helper_sve_ldffbdu_zsu_mte,
5474 gen_helper_sve_ldffhdu_le_zsu_mte,
5475 gen_helper_sve_ldffsdu_le_zsu_mte,
5476 gen_helper_sve_ldffdd_le_zsu_mte, } },
5477 { { gen_helper_sve_ldffbds_zss_mte,
5478 gen_helper_sve_ldffhds_le_zss_mte,
5479 gen_helper_sve_ldffsds_le_zss_mte,
5480 NULL, },
5481 { gen_helper_sve_ldffbdu_zss_mte,
5482 gen_helper_sve_ldffhdu_le_zss_mte,
5483 gen_helper_sve_ldffsdu_le_zss_mte,
5484 gen_helper_sve_ldffdd_le_zss_mte, } },
5485 { { gen_helper_sve_ldffbds_zd_mte,
5486 gen_helper_sve_ldffhds_le_zd_mte,
5487 gen_helper_sve_ldffsds_le_zd_mte,
5488 NULL, },
5489 { gen_helper_sve_ldffbdu_zd_mte,
5490 gen_helper_sve_ldffhdu_le_zd_mte,
5491 gen_helper_sve_ldffsdu_le_zd_mte,
5492 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5493 { /* Big-endian */
5494 { { { gen_helper_sve_ldbds_zsu_mte,
5495 gen_helper_sve_ldhds_be_zsu_mte,
5496 gen_helper_sve_ldsds_be_zsu_mte,
5497 NULL, },
5498 { gen_helper_sve_ldbdu_zsu_mte,
5499 gen_helper_sve_ldhdu_be_zsu_mte,
5500 gen_helper_sve_ldsdu_be_zsu_mte,
5501 gen_helper_sve_lddd_be_zsu_mte, } },
5502 { { gen_helper_sve_ldbds_zss_mte,
5503 gen_helper_sve_ldhds_be_zss_mte,
5504 gen_helper_sve_ldsds_be_zss_mte,
5505 NULL, },
5506 { gen_helper_sve_ldbdu_zss_mte,
5507 gen_helper_sve_ldhdu_be_zss_mte,
5508 gen_helper_sve_ldsdu_be_zss_mte,
5509 gen_helper_sve_lddd_be_zss_mte, } },
5510 { { gen_helper_sve_ldbds_zd_mte,
5511 gen_helper_sve_ldhds_be_zd_mte,
5512 gen_helper_sve_ldsds_be_zd_mte,
5513 NULL, },
5514 { gen_helper_sve_ldbdu_zd_mte,
5515 gen_helper_sve_ldhdu_be_zd_mte,
5516 gen_helper_sve_ldsdu_be_zd_mte,
5517 gen_helper_sve_lddd_be_zd_mte, } } },
5518
5519 /* First-fault */
5520 { { { gen_helper_sve_ldffbds_zsu_mte,
5521 gen_helper_sve_ldffhds_be_zsu_mte,
5522 gen_helper_sve_ldffsds_be_zsu_mte,
5523 NULL, },
5524 { gen_helper_sve_ldffbdu_zsu_mte,
5525 gen_helper_sve_ldffhdu_be_zsu_mte,
5526 gen_helper_sve_ldffsdu_be_zsu_mte,
5527 gen_helper_sve_ldffdd_be_zsu_mte, } },
5528 { { gen_helper_sve_ldffbds_zss_mte,
5529 gen_helper_sve_ldffhds_be_zss_mte,
5530 gen_helper_sve_ldffsds_be_zss_mte,
5531 NULL, },
5532 { gen_helper_sve_ldffbdu_zss_mte,
5533 gen_helper_sve_ldffhdu_be_zss_mte,
5534 gen_helper_sve_ldffsdu_be_zss_mte,
5535 gen_helper_sve_ldffdd_be_zss_mte, } },
5536 { { gen_helper_sve_ldffbds_zd_mte,
5537 gen_helper_sve_ldffhds_be_zd_mte,
5538 gen_helper_sve_ldffsds_be_zd_mte,
5539 NULL, },
5540 { gen_helper_sve_ldffbdu_zd_mte,
5541 gen_helper_sve_ldffhdu_be_zd_mte,
5542 gen_helper_sve_ldffsdu_be_zd_mte,
5543 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5544};
5545
3a7be554 5546static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5547{
5548 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5549 bool be = s->be_data == MO_BE;
5550 bool mte = s->mte_active[0];
673e9fa6
RH
5551
5552 if (!sve_access_check(s)) {
5553 return true;
5554 }
5555
5556 switch (a->esz) {
5557 case MO_32:
d28d12f0 5558 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5559 break;
5560 case MO_64:
d28d12f0 5561 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5562 break;
5563 }
5564 assert(fn != NULL);
5565
5566 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5567 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5568 return true;
5569}
5570
3a7be554 5571static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5572{
5573 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5574 bool be = s->be_data == MO_BE;
5575 bool mte = s->mte_active[0];
673e9fa6
RH
5576 TCGv_i64 imm;
5577
5578 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5579 return false;
5580 }
5581 if (!sve_access_check(s)) {
5582 return true;
5583 }
5584
5585 switch (a->esz) {
5586 case MO_32:
d28d12f0 5587 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5588 break;
5589 case MO_64:
d28d12f0 5590 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5591 break;
5592 }
5593 assert(fn != NULL);
5594
5595 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5596 * by loading the immediate into the scalar parameter.
5597 */
5598 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5599 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
673e9fa6
RH
5600 tcg_temp_free_i64(imm);
5601 return true;
5602}
5603
d28d12f0
RH
5604/* Indexed by [mte][be][xs][msz]. */
5605static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5606 { /* MTE Inactive */
5607 { /* Little-endian */
5608 { gen_helper_sve_stbs_zsu,
5609 gen_helper_sve_sths_le_zsu,
5610 gen_helper_sve_stss_le_zsu, },
5611 { gen_helper_sve_stbs_zss,
5612 gen_helper_sve_sths_le_zss,
5613 gen_helper_sve_stss_le_zss, } },
5614 { /* Big-endian */
5615 { gen_helper_sve_stbs_zsu,
5616 gen_helper_sve_sths_be_zsu,
5617 gen_helper_sve_stss_be_zsu, },
5618 { gen_helper_sve_stbs_zss,
5619 gen_helper_sve_sths_be_zss,
5620 gen_helper_sve_stss_be_zss, } } },
5621 { /* MTE Active */
5622 { /* Little-endian */
5623 { gen_helper_sve_stbs_zsu_mte,
5624 gen_helper_sve_sths_le_zsu_mte,
5625 gen_helper_sve_stss_le_zsu_mte, },
5626 { gen_helper_sve_stbs_zss_mte,
5627 gen_helper_sve_sths_le_zss_mte,
5628 gen_helper_sve_stss_le_zss_mte, } },
5629 { /* Big-endian */
5630 { gen_helper_sve_stbs_zsu_mte,
5631 gen_helper_sve_sths_be_zsu_mte,
5632 gen_helper_sve_stss_be_zsu_mte, },
5633 { gen_helper_sve_stbs_zss_mte,
5634 gen_helper_sve_sths_be_zss_mte,
5635 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5636};
5637
5638/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5639static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5640 { /* MTE Inactive */
5641 { /* Little-endian */
5642 { gen_helper_sve_stbd_zsu,
5643 gen_helper_sve_sthd_le_zsu,
5644 gen_helper_sve_stsd_le_zsu,
5645 gen_helper_sve_stdd_le_zsu, },
5646 { gen_helper_sve_stbd_zss,
5647 gen_helper_sve_sthd_le_zss,
5648 gen_helper_sve_stsd_le_zss,
5649 gen_helper_sve_stdd_le_zss, },
5650 { gen_helper_sve_stbd_zd,
5651 gen_helper_sve_sthd_le_zd,
5652 gen_helper_sve_stsd_le_zd,
5653 gen_helper_sve_stdd_le_zd, } },
5654 { /* Big-endian */
5655 { gen_helper_sve_stbd_zsu,
5656 gen_helper_sve_sthd_be_zsu,
5657 gen_helper_sve_stsd_be_zsu,
5658 gen_helper_sve_stdd_be_zsu, },
5659 { gen_helper_sve_stbd_zss,
5660 gen_helper_sve_sthd_be_zss,
5661 gen_helper_sve_stsd_be_zss,
5662 gen_helper_sve_stdd_be_zss, },
5663 { gen_helper_sve_stbd_zd,
5664 gen_helper_sve_sthd_be_zd,
5665 gen_helper_sve_stsd_be_zd,
5666 gen_helper_sve_stdd_be_zd, } } },
5667 { /* MTE Inactive */
5668 { /* Little-endian */
5669 { gen_helper_sve_stbd_zsu_mte,
5670 gen_helper_sve_sthd_le_zsu_mte,
5671 gen_helper_sve_stsd_le_zsu_mte,
5672 gen_helper_sve_stdd_le_zsu_mte, },
5673 { gen_helper_sve_stbd_zss_mte,
5674 gen_helper_sve_sthd_le_zss_mte,
5675 gen_helper_sve_stsd_le_zss_mte,
5676 gen_helper_sve_stdd_le_zss_mte, },
5677 { gen_helper_sve_stbd_zd_mte,
5678 gen_helper_sve_sthd_le_zd_mte,
5679 gen_helper_sve_stsd_le_zd_mte,
5680 gen_helper_sve_stdd_le_zd_mte, } },
5681 { /* Big-endian */
5682 { gen_helper_sve_stbd_zsu_mte,
5683 gen_helper_sve_sthd_be_zsu_mte,
5684 gen_helper_sve_stsd_be_zsu_mte,
5685 gen_helper_sve_stdd_be_zsu_mte, },
5686 { gen_helper_sve_stbd_zss_mte,
5687 gen_helper_sve_sthd_be_zss_mte,
5688 gen_helper_sve_stsd_be_zss_mte,
5689 gen_helper_sve_stdd_be_zss_mte, },
5690 { gen_helper_sve_stbd_zd_mte,
5691 gen_helper_sve_sthd_be_zd_mte,
5692 gen_helper_sve_stsd_be_zd_mte,
5693 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5694};
5695
3a7be554 5696static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5697{
f6dbf62a 5698 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5699 bool be = s->be_data == MO_BE;
5700 bool mte = s->mte_active[0];
f6dbf62a
RH
5701
5702 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5703 return false;
5704 }
5705 if (!sve_access_check(s)) {
5706 return true;
5707 }
5708 switch (a->esz) {
5709 case MO_32:
d28d12f0 5710 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5711 break;
5712 case MO_64:
d28d12f0 5713 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5714 break;
5715 default:
5716 g_assert_not_reached();
5717 }
5718 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5719 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5720 return true;
5721}
dec6cf6b 5722
3a7be554 5723static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5724{
5725 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5726 bool be = s->be_data == MO_BE;
5727 bool mte = s->mte_active[0];
408ecde9
RH
5728 TCGv_i64 imm;
5729
5730 if (a->esz < a->msz) {
5731 return false;
5732 }
5733 if (!sve_access_check(s)) {
5734 return true;
5735 }
5736
5737 switch (a->esz) {
5738 case MO_32:
d28d12f0 5739 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5740 break;
5741 case MO_64:
d28d12f0 5742 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5743 break;
5744 }
5745 assert(fn != NULL);
5746
5747 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5748 * by loading the immediate into the scalar parameter.
5749 */
5750 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5751 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
408ecde9
RH
5752 tcg_temp_free_i64(imm);
5753 return true;
5754}
5755
dec6cf6b
RH
5756/*
5757 * Prefetches
5758 */
5759
3a7be554 5760static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
5761{
5762 /* Prefetch is a nop within QEMU. */
2f95a3b0 5763 (void)sve_access_check(s);
dec6cf6b
RH
5764 return true;
5765}
5766
3a7be554 5767static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
5768{
5769 if (a->rm == 31) {
5770 return false;
5771 }
5772 /* Prefetch is a nop within QEMU. */
2f95a3b0 5773 (void)sve_access_check(s);
dec6cf6b
RH
5774 return true;
5775}
a2103582
RH
5776
5777/*
5778 * Move Prefix
5779 *
5780 * TODO: The implementation so far could handle predicated merging movprfx.
5781 * The helper functions as written take an extra source register to
5782 * use in the operation, but the result is only written when predication
5783 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5784 * to allow the final write back to the destination to be unconditional.
5785 * For predicated zeroing movprfx, we need to rearrange the helpers to
5786 * allow the final write back to zero inactives.
5787 *
5788 * In the meantime, just emit the moves.
5789 */
5790
3a7be554 5791static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
5792{
5793 return do_mov_z(s, a->rd, a->rn);
5794}
5795
3a7be554 5796static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
5797{
5798 if (sve_access_check(s)) {
5799 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5800 }
5801 return true;
5802}
5803
3a7be554 5804static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582 5805{
60245996 5806 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
a2103582 5807}