]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Fix temp double-free in sve ldr/str
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
dcb32f1d
PMD
23#include "tcg/tcg-op.h"
24#include "tcg/tcg-op-gvec.h"
25#include "tcg/tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
cc48affe 35#include "fpu/softfloat.h"
38388f7e 36
757f9cff 37
9ee3a611
RH
38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
38cadeba
RH
41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
757f9cff
RH
43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
c4e7c493 46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 49
ccd841c3
RH
50/*
51 * Helpers for extracting complex instruction fields.
52 */
53
54/* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
451e4ffd 57static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
58{
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61}
62
451e4ffd 63static int tszimm_shr(DisasContext *s, int x)
ccd841c3 64{
451e4ffd 65 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
66}
67
68/* See e.g. LSL (immediate, predicated). */
451e4ffd 69static int tszimm_shl(DisasContext *s, int x)
ccd841c3 70{
451e4ffd 71 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
72}
73
451e4ffd 74static inline int plus1(DisasContext *s, int x)
24e82e68
RH
75{
76 return x + 1;
77}
78
f25a2361 79/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 80static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
81{
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83}
84
451e4ffd 85static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
86{
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88}
89
c4e7c493
RH
90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
451e4ffd 93static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
94{
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97}
98
38388f7e
RH
99/*
100 * Include the generated decoder.
101 */
102
103#include "decode-sve.inc.c"
104
105/*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
d1822297
RH
109/* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112static inline int pred_full_reg_offset(DisasContext *s, int regno)
113{
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115}
116
117/* Return the byte size of the whole predicate register, VL / 64. */
118static inline int pred_full_reg_size(DisasContext *s)
119{
120 return s->sve_len >> 3;
121}
122
516e246a
RH
123/* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131static int size_for_gvec(int size)
132{
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138}
139
140static int pred_gvec_reg_size(DisasContext *s)
141{
142 return size_for_gvec(pred_full_reg_size(s));
143}
144
39eea561
RH
145/* Invoke a vector expander on two Zregs. */
146static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
147 int esz, int rd, int rn)
38388f7e 148{
39eea561
RH
149 if (sve_access_check(s)) {
150 unsigned vsz = vec_full_reg_size(s);
151 gvec_fn(esz, vec_full_reg_offset(s, rd),
152 vec_full_reg_offset(s, rn), vsz, vsz);
153 }
154 return true;
38388f7e
RH
155}
156
39eea561
RH
157/* Invoke a vector expander on three Zregs. */
158static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
159 int esz, int rd, int rn, int rm)
38388f7e 160{
39eea561
RH
161 if (sve_access_check(s)) {
162 unsigned vsz = vec_full_reg_size(s);
163 gvec_fn(esz, vec_full_reg_offset(s, rd),
164 vec_full_reg_offset(s, rn),
165 vec_full_reg_offset(s, rm), vsz, vsz);
166 }
167 return true;
38388f7e
RH
168}
169
39eea561
RH
170/* Invoke a vector move on two Zregs. */
171static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 172{
39eea561 173 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
174}
175
d9d78dcc
RH
176/* Initialize a Zreg with replications of a 64-bit immediate. */
177static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
178{
179 unsigned vsz = vec_full_reg_size(s);
8711e71f 180 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
d9d78dcc
RH
181}
182
516e246a
RH
183/* Invoke a vector expander on two Pregs. */
184static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
185 int esz, int rd, int rn)
186{
187 if (sve_access_check(s)) {
188 unsigned psz = pred_gvec_reg_size(s);
189 gvec_fn(esz, pred_full_reg_offset(s, rd),
190 pred_full_reg_offset(s, rn), psz, psz);
191 }
192 return true;
193}
194
195/* Invoke a vector expander on three Pregs. */
196static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
197 int esz, int rd, int rn, int rm)
198{
199 if (sve_access_check(s)) {
200 unsigned psz = pred_gvec_reg_size(s);
201 gvec_fn(esz, pred_full_reg_offset(s, rd),
202 pred_full_reg_offset(s, rn),
203 pred_full_reg_offset(s, rm), psz, psz);
204 }
205 return true;
206}
207
208/* Invoke a vector operation on four Pregs. */
209static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
210 int rd, int rn, int rm, int rg)
211{
212 if (sve_access_check(s)) {
213 unsigned psz = pred_gvec_reg_size(s);
214 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
215 pred_full_reg_offset(s, rn),
216 pred_full_reg_offset(s, rm),
217 pred_full_reg_offset(s, rg),
218 psz, psz, gvec_op);
219 }
220 return true;
221}
222
223/* Invoke a vector move on two Pregs. */
224static bool do_mov_p(DisasContext *s, int rd, int rn)
225{
226 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
227}
228
9e18d7a6
RH
229/* Set the cpu flags as per a return from an SVE helper. */
230static void do_pred_flags(TCGv_i32 t)
231{
232 tcg_gen_mov_i32(cpu_NF, t);
233 tcg_gen_andi_i32(cpu_ZF, t, 2);
234 tcg_gen_andi_i32(cpu_CF, t, 1);
235 tcg_gen_movi_i32(cpu_VF, 0);
236}
237
238/* Subroutines computing the ARM PredTest psuedofunction. */
239static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
240{
241 TCGv_i32 t = tcg_temp_new_i32();
242
243 gen_helper_sve_predtest1(t, d, g);
244 do_pred_flags(t);
245 tcg_temp_free_i32(t);
246}
247
248static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
249{
250 TCGv_ptr dptr = tcg_temp_new_ptr();
251 TCGv_ptr gptr = tcg_temp_new_ptr();
252 TCGv_i32 t;
253
254 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
255 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
256 t = tcg_const_i32(words);
257
258 gen_helper_sve_predtest(t, dptr, gptr, t);
259 tcg_temp_free_ptr(dptr);
260 tcg_temp_free_ptr(gptr);
261
262 do_pred_flags(t);
263 tcg_temp_free_i32(t);
264}
265
028e2a7b
RH
266/* For each element size, the bits within a predicate word that are active. */
267const uint64_t pred_esz_masks[4] = {
268 0xffffffffffffffffull, 0x5555555555555555ull,
269 0x1111111111111111ull, 0x0101010101010101ull
270};
271
39eea561
RH
272/*
273 *** SVE Logical - Unpredicated Group
274 */
275
3a7be554 276static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561
RH
277{
278 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
279}
280
3a7be554 281static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 282{
2900847f 283 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
39eea561
RH
284}
285
3a7be554 286static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561
RH
287{
288 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
289}
290
3a7be554 291static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 292{
39eea561 293 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 294}
d1822297 295
fea98f9c
RH
296/*
297 *** SVE Integer Arithmetic - Unpredicated Group
298 */
299
3a7be554 300static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
301{
302 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
303}
304
3a7be554 305static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
306{
307 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
308}
309
3a7be554 310static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
311{
312 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
313}
314
3a7be554 315static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
316{
317 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
318}
319
3a7be554 320static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
321{
322 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
323}
324
3a7be554 325static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
326{
327 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
328}
329
f97cfd59
RH
330/*
331 *** SVE Integer Arithmetic - Binary Predicated Group
332 */
333
334static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
335{
336 unsigned vsz = vec_full_reg_size(s);
337 if (fn == NULL) {
338 return false;
339 }
340 if (sve_access_check(s)) {
341 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
342 vec_full_reg_offset(s, a->rn),
343 vec_full_reg_offset(s, a->rm),
344 pred_full_reg_offset(s, a->pg),
345 vsz, vsz, 0, fn);
346 }
347 return true;
348}
349
a2103582
RH
350/* Select active elememnts from Zn and inactive elements from Zm,
351 * storing the result in Zd.
352 */
353static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
354{
355 static gen_helper_gvec_4 * const fns[4] = {
356 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
357 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
358 };
359 unsigned vsz = vec_full_reg_size(s);
360 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
361 vec_full_reg_offset(s, rn),
362 vec_full_reg_offset(s, rm),
363 pred_full_reg_offset(s, pg),
364 vsz, vsz, 0, fns[esz]);
365}
366
f97cfd59 367#define DO_ZPZZ(NAME, name) \
3a7be554 368static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
369{ \
370 static gen_helper_gvec_4 * const fns[4] = { \
371 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
372 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
373 }; \
374 return do_zpzz_ool(s, a, fns[a->esz]); \
375}
376
377DO_ZPZZ(AND, and)
378DO_ZPZZ(EOR, eor)
379DO_ZPZZ(ORR, orr)
380DO_ZPZZ(BIC, bic)
381
382DO_ZPZZ(ADD, add)
383DO_ZPZZ(SUB, sub)
384
385DO_ZPZZ(SMAX, smax)
386DO_ZPZZ(UMAX, umax)
387DO_ZPZZ(SMIN, smin)
388DO_ZPZZ(UMIN, umin)
389DO_ZPZZ(SABD, sabd)
390DO_ZPZZ(UABD, uabd)
391
392DO_ZPZZ(MUL, mul)
393DO_ZPZZ(SMULH, smulh)
394DO_ZPZZ(UMULH, umulh)
395
27721dbb
RH
396DO_ZPZZ(ASR, asr)
397DO_ZPZZ(LSR, lsr)
398DO_ZPZZ(LSL, lsl)
399
3a7be554 400static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
401{
402 static gen_helper_gvec_4 * const fns[4] = {
403 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
404 };
405 return do_zpzz_ool(s, a, fns[a->esz]);
406}
407
3a7be554 408static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
409{
410 static gen_helper_gvec_4 * const fns[4] = {
411 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
412 };
413 return do_zpzz_ool(s, a, fns[a->esz]);
414}
415
3a7be554 416static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
417{
418 if (sve_access_check(s)) {
419 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
420 }
421 return true;
422}
d3fe4a29 423
f97cfd59
RH
424#undef DO_ZPZZ
425
afac6d04
RH
426/*
427 *** SVE Integer Arithmetic - Unary Predicated Group
428 */
429
430static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
431{
432 if (fn == NULL) {
433 return false;
434 }
435 if (sve_access_check(s)) {
436 unsigned vsz = vec_full_reg_size(s);
437 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
438 vec_full_reg_offset(s, a->rn),
439 pred_full_reg_offset(s, a->pg),
440 vsz, vsz, 0, fn);
441 }
442 return true;
443}
444
445#define DO_ZPZ(NAME, name) \
3a7be554 446static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
447{ \
448 static gen_helper_gvec_3 * const fns[4] = { \
449 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
450 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
451 }; \
452 return do_zpz_ool(s, a, fns[a->esz]); \
453}
454
455DO_ZPZ(CLS, cls)
456DO_ZPZ(CLZ, clz)
457DO_ZPZ(CNT_zpz, cnt_zpz)
458DO_ZPZ(CNOT, cnot)
459DO_ZPZ(NOT_zpz, not_zpz)
460DO_ZPZ(ABS, abs)
461DO_ZPZ(NEG, neg)
462
3a7be554 463static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
464{
465 static gen_helper_gvec_3 * const fns[4] = {
466 NULL,
467 gen_helper_sve_fabs_h,
468 gen_helper_sve_fabs_s,
469 gen_helper_sve_fabs_d
470 };
471 return do_zpz_ool(s, a, fns[a->esz]);
472}
473
3a7be554 474static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
475{
476 static gen_helper_gvec_3 * const fns[4] = {
477 NULL,
478 gen_helper_sve_fneg_h,
479 gen_helper_sve_fneg_s,
480 gen_helper_sve_fneg_d
481 };
482 return do_zpz_ool(s, a, fns[a->esz]);
483}
484
3a7be554 485static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
486{
487 static gen_helper_gvec_3 * const fns[4] = {
488 NULL,
489 gen_helper_sve_sxtb_h,
490 gen_helper_sve_sxtb_s,
491 gen_helper_sve_sxtb_d
492 };
493 return do_zpz_ool(s, a, fns[a->esz]);
494}
495
3a7be554 496static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
497{
498 static gen_helper_gvec_3 * const fns[4] = {
499 NULL,
500 gen_helper_sve_uxtb_h,
501 gen_helper_sve_uxtb_s,
502 gen_helper_sve_uxtb_d
503 };
504 return do_zpz_ool(s, a, fns[a->esz]);
505}
506
3a7be554 507static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
508{
509 static gen_helper_gvec_3 * const fns[4] = {
510 NULL, NULL,
511 gen_helper_sve_sxth_s,
512 gen_helper_sve_sxth_d
513 };
514 return do_zpz_ool(s, a, fns[a->esz]);
515}
516
3a7be554 517static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
518{
519 static gen_helper_gvec_3 * const fns[4] = {
520 NULL, NULL,
521 gen_helper_sve_uxth_s,
522 gen_helper_sve_uxth_d
523 };
524 return do_zpz_ool(s, a, fns[a->esz]);
525}
526
3a7be554 527static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
528{
529 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
530}
531
3a7be554 532static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
533{
534 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
535}
536
537#undef DO_ZPZ
538
047cec97
RH
539/*
540 *** SVE Integer Reduction Group
541 */
542
543typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
544static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
545 gen_helper_gvec_reduc *fn)
546{
547 unsigned vsz = vec_full_reg_size(s);
548 TCGv_ptr t_zn, t_pg;
549 TCGv_i32 desc;
550 TCGv_i64 temp;
551
552 if (fn == NULL) {
553 return false;
554 }
555 if (!sve_access_check(s)) {
556 return true;
557 }
558
559 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
560 temp = tcg_temp_new_i64();
561 t_zn = tcg_temp_new_ptr();
562 t_pg = tcg_temp_new_ptr();
563
564 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
565 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
566 fn(temp, t_zn, t_pg, desc);
567 tcg_temp_free_ptr(t_zn);
568 tcg_temp_free_ptr(t_pg);
569 tcg_temp_free_i32(desc);
570
571 write_fp_dreg(s, a->rd, temp);
572 tcg_temp_free_i64(temp);
573 return true;
574}
575
576#define DO_VPZ(NAME, name) \
3a7be554 577static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
578{ \
579 static gen_helper_gvec_reduc * const fns[4] = { \
580 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
581 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
582 }; \
583 return do_vpz_ool(s, a, fns[a->esz]); \
584}
585
586DO_VPZ(ORV, orv)
587DO_VPZ(ANDV, andv)
588DO_VPZ(EORV, eorv)
589
590DO_VPZ(UADDV, uaddv)
591DO_VPZ(SMAXV, smaxv)
592DO_VPZ(UMAXV, umaxv)
593DO_VPZ(SMINV, sminv)
594DO_VPZ(UMINV, uminv)
595
3a7be554 596static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
597{
598 static gen_helper_gvec_reduc * const fns[4] = {
599 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
600 gen_helper_sve_saddv_s, NULL
601 };
602 return do_vpz_ool(s, a, fns[a->esz]);
603}
604
605#undef DO_VPZ
606
ccd841c3
RH
607/*
608 *** SVE Shift by Immediate - Predicated Group
609 */
610
611/* Store zero into every active element of Zd. We will use this for two
612 * and three-operand predicated instructions for which logic dictates a
613 * zero result.
614 */
615static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
616{
617 static gen_helper_gvec_2 * const fns[4] = {
618 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
619 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
620 };
621 if (sve_access_check(s)) {
622 unsigned vsz = vec_full_reg_size(s);
623 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
624 pred_full_reg_offset(s, pg),
625 vsz, vsz, 0, fns[esz]);
626 }
627 return true;
628}
629
68459864
RH
630/* Copy Zn into Zd, storing zeros into inactive elements. */
631static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
632{
633 static gen_helper_gvec_3 * const fns[4] = {
634 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
635 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
636 };
637 unsigned vsz = vec_full_reg_size(s);
638 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
639 vec_full_reg_offset(s, rn),
640 pred_full_reg_offset(s, pg),
641 vsz, vsz, 0, fns[esz]);
642}
643
ccd841c3
RH
644static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
645 gen_helper_gvec_3 *fn)
646{
647 if (sve_access_check(s)) {
648 unsigned vsz = vec_full_reg_size(s);
649 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
650 vec_full_reg_offset(s, a->rn),
651 pred_full_reg_offset(s, a->pg),
652 vsz, vsz, a->imm, fn);
653 }
654 return true;
655}
656
3a7be554 657static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
658{
659 static gen_helper_gvec_3 * const fns[4] = {
660 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
661 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
662 };
663 if (a->esz < 0) {
664 /* Invalid tsz encoding -- see tszimm_esz. */
665 return false;
666 }
667 /* Shift by element size is architecturally valid. For
668 arithmetic right-shift, it's the same as by one less. */
669 a->imm = MIN(a->imm, (8 << a->esz) - 1);
670 return do_zpzi_ool(s, a, fns[a->esz]);
671}
672
3a7be554 673static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
674{
675 static gen_helper_gvec_3 * const fns[4] = {
676 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
677 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
678 };
679 if (a->esz < 0) {
680 return false;
681 }
682 /* Shift by element size is architecturally valid.
683 For logical shifts, it is a zeroing operation. */
684 if (a->imm >= (8 << a->esz)) {
685 return do_clr_zp(s, a->rd, a->pg, a->esz);
686 } else {
687 return do_zpzi_ool(s, a, fns[a->esz]);
688 }
689}
690
3a7be554 691static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
692{
693 static gen_helper_gvec_3 * const fns[4] = {
694 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
695 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
696 };
697 if (a->esz < 0) {
698 return false;
699 }
700 /* Shift by element size is architecturally valid.
701 For logical shifts, it is a zeroing operation. */
702 if (a->imm >= (8 << a->esz)) {
703 return do_clr_zp(s, a->rd, a->pg, a->esz);
704 } else {
705 return do_zpzi_ool(s, a, fns[a->esz]);
706 }
707}
708
3a7be554 709static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
710{
711 static gen_helper_gvec_3 * const fns[4] = {
712 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
713 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
714 };
715 if (a->esz < 0) {
716 return false;
717 }
718 /* Shift by element size is architecturally valid. For arithmetic
719 right shift for division, it is a zeroing operation. */
720 if (a->imm >= (8 << a->esz)) {
721 return do_clr_zp(s, a->rd, a->pg, a->esz);
722 } else {
723 return do_zpzi_ool(s, a, fns[a->esz]);
724 }
725}
726
fe7f8dfb
RH
727/*
728 *** SVE Bitwise Shift - Predicated Group
729 */
730
731#define DO_ZPZW(NAME, name) \
3a7be554 732static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
733{ \
734 static gen_helper_gvec_4 * const fns[3] = { \
735 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
736 gen_helper_sve_##name##_zpzw_s, \
737 }; \
738 if (a->esz < 0 || a->esz >= 3) { \
739 return false; \
740 } \
741 return do_zpzz_ool(s, a, fns[a->esz]); \
742}
743
744DO_ZPZW(ASR, asr)
745DO_ZPZW(LSR, lsr)
746DO_ZPZW(LSL, lsl)
747
748#undef DO_ZPZW
749
d9d78dcc
RH
750/*
751 *** SVE Bitwise Shift - Unpredicated Group
752 */
753
754static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
755 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
756 int64_t, uint32_t, uint32_t))
757{
758 if (a->esz < 0) {
759 /* Invalid tsz encoding -- see tszimm_esz. */
760 return false;
761 }
762 if (sve_access_check(s)) {
763 unsigned vsz = vec_full_reg_size(s);
764 /* Shift by element size is architecturally valid. For
765 arithmetic right-shift, it's the same as by one less.
766 Otherwise it is a zeroing operation. */
767 if (a->imm >= 8 << a->esz) {
768 if (asr) {
769 a->imm = (8 << a->esz) - 1;
770 } else {
771 do_dupi_z(s, a->rd, 0);
772 return true;
773 }
774 }
775 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
776 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
777 }
778 return true;
779}
780
3a7be554 781static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
782{
783 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
784}
785
3a7be554 786static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
787{
788 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
789}
790
3a7be554 791static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
792{
793 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
794}
795
796static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
797{
798 if (fn == NULL) {
799 return false;
800 }
801 if (sve_access_check(s)) {
802 unsigned vsz = vec_full_reg_size(s);
803 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
804 vec_full_reg_offset(s, a->rn),
805 vec_full_reg_offset(s, a->rm),
806 vsz, vsz, 0, fn);
807 }
808 return true;
809}
810
811#define DO_ZZW(NAME, name) \
3a7be554 812static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
813{ \
814 static gen_helper_gvec_3 * const fns[4] = { \
815 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
816 gen_helper_sve_##name##_zzw_s, NULL \
817 }; \
818 return do_zzw_ool(s, a, fns[a->esz]); \
819}
820
821DO_ZZW(ASR, asr)
822DO_ZZW(LSR, lsr)
823DO_ZZW(LSL, lsl)
824
825#undef DO_ZZW
826
96a36e4a
RH
827/*
828 *** SVE Integer Multiply-Add Group
829 */
830
831static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
832 gen_helper_gvec_5 *fn)
833{
834 if (sve_access_check(s)) {
835 unsigned vsz = vec_full_reg_size(s);
836 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
837 vec_full_reg_offset(s, a->ra),
838 vec_full_reg_offset(s, a->rn),
839 vec_full_reg_offset(s, a->rm),
840 pred_full_reg_offset(s, a->pg),
841 vsz, vsz, 0, fn);
842 }
843 return true;
844}
845
846#define DO_ZPZZZ(NAME, name) \
3a7be554 847static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
848{ \
849 static gen_helper_gvec_5 * const fns[4] = { \
850 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
851 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
852 }; \
853 return do_zpzzz_ool(s, a, fns[a->esz]); \
854}
855
856DO_ZPZZZ(MLA, mla)
857DO_ZPZZZ(MLS, mls)
858
859#undef DO_ZPZZZ
860
9a56c9c3
RH
861/*
862 *** SVE Index Generation Group
863 */
864
865static void do_index(DisasContext *s, int esz, int rd,
866 TCGv_i64 start, TCGv_i64 incr)
867{
868 unsigned vsz = vec_full_reg_size(s);
869 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
870 TCGv_ptr t_zd = tcg_temp_new_ptr();
871
872 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
873 if (esz == 3) {
874 gen_helper_sve_index_d(t_zd, start, incr, desc);
875 } else {
876 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
877 static index_fn * const fns[3] = {
878 gen_helper_sve_index_b,
879 gen_helper_sve_index_h,
880 gen_helper_sve_index_s,
881 };
882 TCGv_i32 s32 = tcg_temp_new_i32();
883 TCGv_i32 i32 = tcg_temp_new_i32();
884
885 tcg_gen_extrl_i64_i32(s32, start);
886 tcg_gen_extrl_i64_i32(i32, incr);
887 fns[esz](t_zd, s32, i32, desc);
888
889 tcg_temp_free_i32(s32);
890 tcg_temp_free_i32(i32);
891 }
892 tcg_temp_free_ptr(t_zd);
893 tcg_temp_free_i32(desc);
894}
895
3a7be554 896static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
897{
898 if (sve_access_check(s)) {
899 TCGv_i64 start = tcg_const_i64(a->imm1);
900 TCGv_i64 incr = tcg_const_i64(a->imm2);
901 do_index(s, a->esz, a->rd, start, incr);
902 tcg_temp_free_i64(start);
903 tcg_temp_free_i64(incr);
904 }
905 return true;
906}
907
3a7be554 908static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
909{
910 if (sve_access_check(s)) {
911 TCGv_i64 start = tcg_const_i64(a->imm);
912 TCGv_i64 incr = cpu_reg(s, a->rm);
913 do_index(s, a->esz, a->rd, start, incr);
914 tcg_temp_free_i64(start);
915 }
916 return true;
917}
918
3a7be554 919static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
920{
921 if (sve_access_check(s)) {
922 TCGv_i64 start = cpu_reg(s, a->rn);
923 TCGv_i64 incr = tcg_const_i64(a->imm);
924 do_index(s, a->esz, a->rd, start, incr);
925 tcg_temp_free_i64(incr);
926 }
927 return true;
928}
929
3a7be554 930static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
931{
932 if (sve_access_check(s)) {
933 TCGv_i64 start = cpu_reg(s, a->rn);
934 TCGv_i64 incr = cpu_reg(s, a->rm);
935 do_index(s, a->esz, a->rd, start, incr);
936 }
937 return true;
938}
939
96f922cc
RH
940/*
941 *** SVE Stack Allocation Group
942 */
943
3a7be554 944static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 945{
5de56742
AC
946 if (sve_access_check(s)) {
947 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
948 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
949 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
950 }
96f922cc
RH
951 return true;
952}
953
3a7be554 954static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 955{
5de56742
AC
956 if (sve_access_check(s)) {
957 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
958 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
959 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
960 }
96f922cc
RH
961 return true;
962}
963
3a7be554 964static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 965{
5de56742
AC
966 if (sve_access_check(s)) {
967 TCGv_i64 reg = cpu_reg(s, a->rd);
968 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
969 }
96f922cc
RH
970 return true;
971}
972
4b242d9c
RH
973/*
974 *** SVE Compute Vector Address Group
975 */
976
977static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
978{
979 if (sve_access_check(s)) {
980 unsigned vsz = vec_full_reg_size(s);
981 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
982 vec_full_reg_offset(s, a->rn),
983 vec_full_reg_offset(s, a->rm),
984 vsz, vsz, a->imm, fn);
985 }
986 return true;
987}
988
3a7be554 989static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
990{
991 return do_adr(s, a, gen_helper_sve_adr_p32);
992}
993
3a7be554 994static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
995{
996 return do_adr(s, a, gen_helper_sve_adr_p64);
997}
998
3a7be554 999static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1000{
1001 return do_adr(s, a, gen_helper_sve_adr_s32);
1002}
1003
3a7be554 1004static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1005{
1006 return do_adr(s, a, gen_helper_sve_adr_u32);
1007}
1008
0762cd42
RH
1009/*
1010 *** SVE Integer Misc - Unpredicated Group
1011 */
1012
3a7be554 1013static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
0762cd42
RH
1014{
1015 static gen_helper_gvec_2 * const fns[4] = {
1016 NULL,
1017 gen_helper_sve_fexpa_h,
1018 gen_helper_sve_fexpa_s,
1019 gen_helper_sve_fexpa_d,
1020 };
1021 if (a->esz == 0) {
1022 return false;
1023 }
1024 if (sve_access_check(s)) {
1025 unsigned vsz = vec_full_reg_size(s);
1026 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1027 vec_full_reg_offset(s, a->rn),
1028 vsz, vsz, 0, fns[a->esz]);
1029 }
1030 return true;
1031}
1032
3a7be554 1033static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1034{
1035 static gen_helper_gvec_3 * const fns[4] = {
1036 NULL,
1037 gen_helper_sve_ftssel_h,
1038 gen_helper_sve_ftssel_s,
1039 gen_helper_sve_ftssel_d,
1040 };
1041 if (a->esz == 0) {
1042 return false;
1043 }
1044 if (sve_access_check(s)) {
1045 unsigned vsz = vec_full_reg_size(s);
1046 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1047 vec_full_reg_offset(s, a->rn),
1048 vec_full_reg_offset(s, a->rm),
1049 vsz, vsz, 0, fns[a->esz]);
1050 }
1051 return true;
1052}
1053
516e246a
RH
1054/*
1055 *** SVE Predicate Logical Operations Group
1056 */
1057
1058static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1059 const GVecGen4 *gvec_op)
1060{
1061 if (!sve_access_check(s)) {
1062 return true;
1063 }
1064
1065 unsigned psz = pred_gvec_reg_size(s);
1066 int dofs = pred_full_reg_offset(s, a->rd);
1067 int nofs = pred_full_reg_offset(s, a->rn);
1068 int mofs = pred_full_reg_offset(s, a->rm);
1069 int gofs = pred_full_reg_offset(s, a->pg);
1070
1071 if (psz == 8) {
1072 /* Do the operation and the flags generation in temps. */
1073 TCGv_i64 pd = tcg_temp_new_i64();
1074 TCGv_i64 pn = tcg_temp_new_i64();
1075 TCGv_i64 pm = tcg_temp_new_i64();
1076 TCGv_i64 pg = tcg_temp_new_i64();
1077
1078 tcg_gen_ld_i64(pn, cpu_env, nofs);
1079 tcg_gen_ld_i64(pm, cpu_env, mofs);
1080 tcg_gen_ld_i64(pg, cpu_env, gofs);
1081
1082 gvec_op->fni8(pd, pn, pm, pg);
1083 tcg_gen_st_i64(pd, cpu_env, dofs);
1084
1085 do_predtest1(pd, pg);
1086
1087 tcg_temp_free_i64(pd);
1088 tcg_temp_free_i64(pn);
1089 tcg_temp_free_i64(pm);
1090 tcg_temp_free_i64(pg);
1091 } else {
1092 /* The operation and flags generation is large. The computation
1093 * of the flags depends on the original contents of the guarding
1094 * predicate. If the destination overwrites the guarding predicate,
1095 * then the easiest way to get this right is to save a copy.
1096 */
1097 int tofs = gofs;
1098 if (a->rd == a->pg) {
1099 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1100 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1101 }
1102
1103 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1104 do_predtest(s, dofs, tofs, psz / 8);
1105 }
1106 return true;
1107}
1108
1109static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110{
1111 tcg_gen_and_i64(pd, pn, pm);
1112 tcg_gen_and_i64(pd, pd, pg);
1113}
1114
1115static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116 TCGv_vec pm, TCGv_vec pg)
1117{
1118 tcg_gen_and_vec(vece, pd, pn, pm);
1119 tcg_gen_and_vec(vece, pd, pd, pg);
1120}
1121
3a7be554 1122static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1123{
1124 static const GVecGen4 op = {
1125 .fni8 = gen_and_pg_i64,
1126 .fniv = gen_and_pg_vec,
1127 .fno = gen_helper_sve_and_pppp,
1128 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1129 };
1130 if (a->s) {
1131 return do_pppp_flags(s, a, &op);
1132 } else if (a->rn == a->rm) {
1133 if (a->pg == a->rn) {
1134 return do_mov_p(s, a->rd, a->rn);
1135 } else {
1136 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1137 }
1138 } else if (a->pg == a->rn || a->pg == a->rm) {
1139 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1140 } else {
1141 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1142 }
1143}
1144
1145static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1146{
1147 tcg_gen_andc_i64(pd, pn, pm);
1148 tcg_gen_and_i64(pd, pd, pg);
1149}
1150
1151static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1152 TCGv_vec pm, TCGv_vec pg)
1153{
1154 tcg_gen_andc_vec(vece, pd, pn, pm);
1155 tcg_gen_and_vec(vece, pd, pd, pg);
1156}
1157
3a7be554 1158static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1159{
1160 static const GVecGen4 op = {
1161 .fni8 = gen_bic_pg_i64,
1162 .fniv = gen_bic_pg_vec,
1163 .fno = gen_helper_sve_bic_pppp,
1164 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1165 };
1166 if (a->s) {
1167 return do_pppp_flags(s, a, &op);
1168 } else if (a->pg == a->rn) {
1169 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1170 } else {
1171 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1172 }
1173}
1174
1175static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1176{
1177 tcg_gen_xor_i64(pd, pn, pm);
1178 tcg_gen_and_i64(pd, pd, pg);
1179}
1180
1181static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1182 TCGv_vec pm, TCGv_vec pg)
1183{
1184 tcg_gen_xor_vec(vece, pd, pn, pm);
1185 tcg_gen_and_vec(vece, pd, pd, pg);
1186}
1187
3a7be554 1188static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1189{
1190 static const GVecGen4 op = {
1191 .fni8 = gen_eor_pg_i64,
1192 .fniv = gen_eor_pg_vec,
1193 .fno = gen_helper_sve_eor_pppp,
1194 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1195 };
1196 if (a->s) {
1197 return do_pppp_flags(s, a, &op);
1198 } else {
1199 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1200 }
1201}
1202
1203static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1204{
1205 tcg_gen_and_i64(pn, pn, pg);
1206 tcg_gen_andc_i64(pm, pm, pg);
1207 tcg_gen_or_i64(pd, pn, pm);
1208}
1209
1210static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1211 TCGv_vec pm, TCGv_vec pg)
1212{
1213 tcg_gen_and_vec(vece, pn, pn, pg);
1214 tcg_gen_andc_vec(vece, pm, pm, pg);
1215 tcg_gen_or_vec(vece, pd, pn, pm);
1216}
1217
3a7be554 1218static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1219{
1220 static const GVecGen4 op = {
1221 .fni8 = gen_sel_pg_i64,
1222 .fniv = gen_sel_pg_vec,
1223 .fno = gen_helper_sve_sel_pppp,
1224 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1225 };
1226 if (a->s) {
1227 return false;
1228 } else {
1229 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1230 }
1231}
1232
1233static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1234{
1235 tcg_gen_or_i64(pd, pn, pm);
1236 tcg_gen_and_i64(pd, pd, pg);
1237}
1238
1239static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1240 TCGv_vec pm, TCGv_vec pg)
1241{
1242 tcg_gen_or_vec(vece, pd, pn, pm);
1243 tcg_gen_and_vec(vece, pd, pd, pg);
1244}
1245
3a7be554 1246static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1247{
1248 static const GVecGen4 op = {
1249 .fni8 = gen_orr_pg_i64,
1250 .fniv = gen_orr_pg_vec,
1251 .fno = gen_helper_sve_orr_pppp,
1252 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1253 };
1254 if (a->s) {
1255 return do_pppp_flags(s, a, &op);
1256 } else if (a->pg == a->rn && a->rn == a->rm) {
1257 return do_mov_p(s, a->rd, a->rn);
1258 } else {
1259 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1260 }
1261}
1262
1263static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1264{
1265 tcg_gen_orc_i64(pd, pn, pm);
1266 tcg_gen_and_i64(pd, pd, pg);
1267}
1268
1269static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1270 TCGv_vec pm, TCGv_vec pg)
1271{
1272 tcg_gen_orc_vec(vece, pd, pn, pm);
1273 tcg_gen_and_vec(vece, pd, pd, pg);
1274}
1275
3a7be554 1276static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1277{
1278 static const GVecGen4 op = {
1279 .fni8 = gen_orn_pg_i64,
1280 .fniv = gen_orn_pg_vec,
1281 .fno = gen_helper_sve_orn_pppp,
1282 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1283 };
1284 if (a->s) {
1285 return do_pppp_flags(s, a, &op);
1286 } else {
1287 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1288 }
1289}
1290
1291static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1292{
1293 tcg_gen_or_i64(pd, pn, pm);
1294 tcg_gen_andc_i64(pd, pg, pd);
1295}
1296
1297static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1298 TCGv_vec pm, TCGv_vec pg)
1299{
1300 tcg_gen_or_vec(vece, pd, pn, pm);
1301 tcg_gen_andc_vec(vece, pd, pg, pd);
1302}
1303
3a7be554 1304static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1305{
1306 static const GVecGen4 op = {
1307 .fni8 = gen_nor_pg_i64,
1308 .fniv = gen_nor_pg_vec,
1309 .fno = gen_helper_sve_nor_pppp,
1310 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1311 };
1312 if (a->s) {
1313 return do_pppp_flags(s, a, &op);
1314 } else {
1315 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1316 }
1317}
1318
1319static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1320{
1321 tcg_gen_and_i64(pd, pn, pm);
1322 tcg_gen_andc_i64(pd, pg, pd);
1323}
1324
1325static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326 TCGv_vec pm, TCGv_vec pg)
1327{
1328 tcg_gen_and_vec(vece, pd, pn, pm);
1329 tcg_gen_andc_vec(vece, pd, pg, pd);
1330}
1331
3a7be554 1332static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1333{
1334 static const GVecGen4 op = {
1335 .fni8 = gen_nand_pg_i64,
1336 .fniv = gen_nand_pg_vec,
1337 .fno = gen_helper_sve_nand_pppp,
1338 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1339 };
1340 if (a->s) {
1341 return do_pppp_flags(s, a, &op);
1342 } else {
1343 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1344 }
1345}
1346
9e18d7a6
RH
1347/*
1348 *** SVE Predicate Misc Group
1349 */
1350
3a7be554 1351static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1352{
1353 if (sve_access_check(s)) {
1354 int nofs = pred_full_reg_offset(s, a->rn);
1355 int gofs = pred_full_reg_offset(s, a->pg);
1356 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1357
1358 if (words == 1) {
1359 TCGv_i64 pn = tcg_temp_new_i64();
1360 TCGv_i64 pg = tcg_temp_new_i64();
1361
1362 tcg_gen_ld_i64(pn, cpu_env, nofs);
1363 tcg_gen_ld_i64(pg, cpu_env, gofs);
1364 do_predtest1(pn, pg);
1365
1366 tcg_temp_free_i64(pn);
1367 tcg_temp_free_i64(pg);
1368 } else {
1369 do_predtest(s, nofs, gofs, words);
1370 }
1371 }
1372 return true;
1373}
1374
028e2a7b
RH
1375/* See the ARM pseudocode DecodePredCount. */
1376static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1377{
1378 unsigned elements = fullsz >> esz;
1379 unsigned bound;
1380
1381 switch (pattern) {
1382 case 0x0: /* POW2 */
1383 return pow2floor(elements);
1384 case 0x1: /* VL1 */
1385 case 0x2: /* VL2 */
1386 case 0x3: /* VL3 */
1387 case 0x4: /* VL4 */
1388 case 0x5: /* VL5 */
1389 case 0x6: /* VL6 */
1390 case 0x7: /* VL7 */
1391 case 0x8: /* VL8 */
1392 bound = pattern;
1393 break;
1394 case 0x9: /* VL16 */
1395 case 0xa: /* VL32 */
1396 case 0xb: /* VL64 */
1397 case 0xc: /* VL128 */
1398 case 0xd: /* VL256 */
1399 bound = 16 << (pattern - 9);
1400 break;
1401 case 0x1d: /* MUL4 */
1402 return elements - elements % 4;
1403 case 0x1e: /* MUL3 */
1404 return elements - elements % 3;
1405 case 0x1f: /* ALL */
1406 return elements;
1407 default: /* #uimm5 */
1408 return 0;
1409 }
1410 return elements >= bound ? bound : 0;
1411}
1412
1413/* This handles all of the predicate initialization instructions,
1414 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1415 * so that decode_pred_count returns 0. For SETFFR, we will have
1416 * set RD == 16 == FFR.
1417 */
1418static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1419{
1420 if (!sve_access_check(s)) {
1421 return true;
1422 }
1423
1424 unsigned fullsz = vec_full_reg_size(s);
1425 unsigned ofs = pred_full_reg_offset(s, rd);
1426 unsigned numelem, setsz, i;
1427 uint64_t word, lastword;
1428 TCGv_i64 t;
1429
1430 numelem = decode_pred_count(fullsz, pat, esz);
1431
1432 /* Determine what we must store into each bit, and how many. */
1433 if (numelem == 0) {
1434 lastword = word = 0;
1435 setsz = fullsz;
1436 } else {
1437 setsz = numelem << esz;
1438 lastword = word = pred_esz_masks[esz];
1439 if (setsz % 64) {
973558a3 1440 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1441 }
1442 }
1443
1444 t = tcg_temp_new_i64();
1445 if (fullsz <= 64) {
1446 tcg_gen_movi_i64(t, lastword);
1447 tcg_gen_st_i64(t, cpu_env, ofs);
1448 goto done;
1449 }
1450
1451 if (word == lastword) {
1452 unsigned maxsz = size_for_gvec(fullsz / 8);
1453 unsigned oprsz = size_for_gvec(setsz / 8);
1454
1455 if (oprsz * 8 == setsz) {
8711e71f 1456 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
028e2a7b
RH
1457 goto done;
1458 }
028e2a7b
RH
1459 }
1460
1461 setsz /= 8;
1462 fullsz /= 8;
1463
1464 tcg_gen_movi_i64(t, word);
973558a3 1465 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1466 tcg_gen_st_i64(t, cpu_env, ofs + i);
1467 }
1468 if (lastword != word) {
1469 tcg_gen_movi_i64(t, lastword);
1470 tcg_gen_st_i64(t, cpu_env, ofs + i);
1471 i += 8;
1472 }
1473 if (i < fullsz) {
1474 tcg_gen_movi_i64(t, 0);
1475 for (; i < fullsz; i += 8) {
1476 tcg_gen_st_i64(t, cpu_env, ofs + i);
1477 }
1478 }
1479
1480 done:
1481 tcg_temp_free_i64(t);
1482
1483 /* PTRUES */
1484 if (setflag) {
1485 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1486 tcg_gen_movi_i32(cpu_CF, word == 0);
1487 tcg_gen_movi_i32(cpu_VF, 0);
1488 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1489 }
1490 return true;
1491}
1492
3a7be554 1493static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1494{
1495 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1496}
1497
3a7be554 1498static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1499{
1500 /* Note pat == 31 is #all, to set all elements. */
1501 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1502}
1503
3a7be554 1504static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1505{
1506 /* Note pat == 32 is #unimp, to set no elements. */
1507 return do_predset(s, 0, a->rd, 32, false);
1508}
1509
3a7be554 1510static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1511{
1512 /* The path through do_pppp_flags is complicated enough to want to avoid
1513 * duplication. Frob the arguments into the form of a predicated AND.
1514 */
1515 arg_rprr_s alt_a = {
1516 .rd = a->rd, .pg = a->pg, .s = a->s,
1517 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1518 };
3a7be554 1519 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1520}
1521
3a7be554 1522static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1523{
1524 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1525}
1526
3a7be554 1527static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1528{
1529 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1530}
1531
1532static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1533 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1534 TCGv_ptr, TCGv_i32))
1535{
1536 if (!sve_access_check(s)) {
1537 return true;
1538 }
1539
1540 TCGv_ptr t_pd = tcg_temp_new_ptr();
1541 TCGv_ptr t_pg = tcg_temp_new_ptr();
1542 TCGv_i32 t;
1543 unsigned desc;
1544
1545 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1546 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1547
1548 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1549 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1550 t = tcg_const_i32(desc);
1551
1552 gen_fn(t, t_pd, t_pg, t);
1553 tcg_temp_free_ptr(t_pd);
1554 tcg_temp_free_ptr(t_pg);
1555
1556 do_pred_flags(t);
1557 tcg_temp_free_i32(t);
1558 return true;
1559}
1560
3a7be554 1561static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1562{
1563 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1564}
1565
3a7be554 1566static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1567{
1568 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1569}
1570
24e82e68
RH
1571/*
1572 *** SVE Element Count Group
1573 */
1574
1575/* Perform an inline saturating addition of a 32-bit value within
1576 * a 64-bit register. The second operand is known to be positive,
1577 * which halves the comparisions we must perform to bound the result.
1578 */
1579static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1580{
1581 int64_t ibound;
1582 TCGv_i64 bound;
1583 TCGCond cond;
1584
1585 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1586 if (u) {
1587 tcg_gen_ext32u_i64(reg, reg);
1588 } else {
1589 tcg_gen_ext32s_i64(reg, reg);
1590 }
1591 if (d) {
1592 tcg_gen_sub_i64(reg, reg, val);
1593 ibound = (u ? 0 : INT32_MIN);
1594 cond = TCG_COND_LT;
1595 } else {
1596 tcg_gen_add_i64(reg, reg, val);
1597 ibound = (u ? UINT32_MAX : INT32_MAX);
1598 cond = TCG_COND_GT;
1599 }
1600 bound = tcg_const_i64(ibound);
1601 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1602 tcg_temp_free_i64(bound);
1603}
1604
1605/* Similarly with 64-bit values. */
1606static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1607{
1608 TCGv_i64 t0 = tcg_temp_new_i64();
1609 TCGv_i64 t1 = tcg_temp_new_i64();
1610 TCGv_i64 t2;
1611
1612 if (u) {
1613 if (d) {
1614 tcg_gen_sub_i64(t0, reg, val);
1615 tcg_gen_movi_i64(t1, 0);
1616 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1617 } else {
1618 tcg_gen_add_i64(t0, reg, val);
1619 tcg_gen_movi_i64(t1, -1);
1620 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1621 }
1622 } else {
1623 if (d) {
1624 /* Detect signed overflow for subtraction. */
1625 tcg_gen_xor_i64(t0, reg, val);
1626 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1627 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1628 tcg_gen_and_i64(t0, t0, reg);
1629
1630 /* Bound the result. */
1631 tcg_gen_movi_i64(reg, INT64_MIN);
1632 t2 = tcg_const_i64(0);
1633 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1634 } else {
1635 /* Detect signed overflow for addition. */
1636 tcg_gen_xor_i64(t0, reg, val);
1637 tcg_gen_add_i64(reg, reg, val);
1638 tcg_gen_xor_i64(t1, reg, val);
1639 tcg_gen_andc_i64(t0, t1, t0);
1640
1641 /* Bound the result. */
1642 tcg_gen_movi_i64(t1, INT64_MAX);
1643 t2 = tcg_const_i64(0);
1644 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1645 }
1646 tcg_temp_free_i64(t2);
1647 }
1648 tcg_temp_free_i64(t0);
1649 tcg_temp_free_i64(t1);
1650}
1651
1652/* Similarly with a vector and a scalar operand. */
1653static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1654 TCGv_i64 val, bool u, bool d)
1655{
1656 unsigned vsz = vec_full_reg_size(s);
1657 TCGv_ptr dptr, nptr;
1658 TCGv_i32 t32, desc;
1659 TCGv_i64 t64;
1660
1661 dptr = tcg_temp_new_ptr();
1662 nptr = tcg_temp_new_ptr();
1663 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1664 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1665 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1666
1667 switch (esz) {
1668 case MO_8:
1669 t32 = tcg_temp_new_i32();
1670 tcg_gen_extrl_i64_i32(t32, val);
1671 if (d) {
1672 tcg_gen_neg_i32(t32, t32);
1673 }
1674 if (u) {
1675 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1676 } else {
1677 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1678 }
1679 tcg_temp_free_i32(t32);
1680 break;
1681
1682 case MO_16:
1683 t32 = tcg_temp_new_i32();
1684 tcg_gen_extrl_i64_i32(t32, val);
1685 if (d) {
1686 tcg_gen_neg_i32(t32, t32);
1687 }
1688 if (u) {
1689 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1690 } else {
1691 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1692 }
1693 tcg_temp_free_i32(t32);
1694 break;
1695
1696 case MO_32:
1697 t64 = tcg_temp_new_i64();
1698 if (d) {
1699 tcg_gen_neg_i64(t64, val);
1700 } else {
1701 tcg_gen_mov_i64(t64, val);
1702 }
1703 if (u) {
1704 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1705 } else {
1706 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1707 }
1708 tcg_temp_free_i64(t64);
1709 break;
1710
1711 case MO_64:
1712 if (u) {
1713 if (d) {
1714 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1715 } else {
1716 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1717 }
1718 } else if (d) {
1719 t64 = tcg_temp_new_i64();
1720 tcg_gen_neg_i64(t64, val);
1721 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1722 tcg_temp_free_i64(t64);
1723 } else {
1724 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1725 }
1726 break;
1727
1728 default:
1729 g_assert_not_reached();
1730 }
1731
1732 tcg_temp_free_ptr(dptr);
1733 tcg_temp_free_ptr(nptr);
1734 tcg_temp_free_i32(desc);
1735}
1736
3a7be554 1737static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1738{
1739 if (sve_access_check(s)) {
1740 unsigned fullsz = vec_full_reg_size(s);
1741 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1742 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1743 }
1744 return true;
1745}
1746
3a7be554 1747static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1748{
1749 if (sve_access_check(s)) {
1750 unsigned fullsz = vec_full_reg_size(s);
1751 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752 int inc = numelem * a->imm * (a->d ? -1 : 1);
1753 TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755 tcg_gen_addi_i64(reg, reg, inc);
1756 }
1757 return true;
1758}
1759
3a7be554 1760static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1761{
1762 if (!sve_access_check(s)) {
1763 return true;
1764 }
1765
1766 unsigned fullsz = vec_full_reg_size(s);
1767 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1768 int inc = numelem * a->imm;
1769 TCGv_i64 reg = cpu_reg(s, a->rd);
1770
1771 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1772 if (inc == 0) {
1773 if (a->u) {
1774 tcg_gen_ext32u_i64(reg, reg);
1775 } else {
1776 tcg_gen_ext32s_i64(reg, reg);
1777 }
1778 } else {
1779 TCGv_i64 t = tcg_const_i64(inc);
1780 do_sat_addsub_32(reg, t, a->u, a->d);
1781 tcg_temp_free_i64(t);
1782 }
1783 return true;
1784}
1785
3a7be554 1786static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1787{
1788 if (!sve_access_check(s)) {
1789 return true;
1790 }
1791
1792 unsigned fullsz = vec_full_reg_size(s);
1793 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1794 int inc = numelem * a->imm;
1795 TCGv_i64 reg = cpu_reg(s, a->rd);
1796
1797 if (inc != 0) {
1798 TCGv_i64 t = tcg_const_i64(inc);
1799 do_sat_addsub_64(reg, t, a->u, a->d);
1800 tcg_temp_free_i64(t);
1801 }
1802 return true;
1803}
1804
3a7be554 1805static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1806{
1807 if (a->esz == 0) {
1808 return false;
1809 }
1810
1811 unsigned fullsz = vec_full_reg_size(s);
1812 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1813 int inc = numelem * a->imm;
1814
1815 if (inc != 0) {
1816 if (sve_access_check(s)) {
1817 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1818 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1819 vec_full_reg_offset(s, a->rn),
1820 t, fullsz, fullsz);
1821 tcg_temp_free_i64(t);
1822 }
1823 } else {
1824 do_mov_z(s, a->rd, a->rn);
1825 }
1826 return true;
1827}
1828
3a7be554 1829static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1830{
1831 if (a->esz == 0) {
1832 return false;
1833 }
1834
1835 unsigned fullsz = vec_full_reg_size(s);
1836 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1837 int inc = numelem * a->imm;
1838
1839 if (inc != 0) {
1840 if (sve_access_check(s)) {
1841 TCGv_i64 t = tcg_const_i64(inc);
1842 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1843 tcg_temp_free_i64(t);
1844 }
1845 } else {
1846 do_mov_z(s, a->rd, a->rn);
1847 }
1848 return true;
1849}
1850
e1fa1164
RH
1851/*
1852 *** SVE Bitwise Immediate Group
1853 */
1854
1855static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1856{
1857 uint64_t imm;
1858 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1859 extract32(a->dbm, 0, 6),
1860 extract32(a->dbm, 6, 6))) {
1861 return false;
1862 }
1863 if (sve_access_check(s)) {
1864 unsigned vsz = vec_full_reg_size(s);
1865 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1866 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1867 }
1868 return true;
1869}
1870
3a7be554 1871static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1872{
1873 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1874}
1875
3a7be554 1876static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1877{
1878 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1879}
1880
3a7be554 1881static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1882{
1883 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1884}
1885
3a7be554 1886static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
1887{
1888 uint64_t imm;
1889 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1890 extract32(a->dbm, 0, 6),
1891 extract32(a->dbm, 6, 6))) {
1892 return false;
1893 }
1894 if (sve_access_check(s)) {
1895 do_dupi_z(s, a->rd, imm);
1896 }
1897 return true;
1898}
1899
f25a2361
RH
1900/*
1901 *** SVE Integer Wide Immediate - Predicated Group
1902 */
1903
1904/* Implement all merging copies. This is used for CPY (immediate),
1905 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1906 */
1907static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1908 TCGv_i64 val)
1909{
1910 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1911 static gen_cpy * const fns[4] = {
1912 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1913 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1914 };
1915 unsigned vsz = vec_full_reg_size(s);
1916 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1917 TCGv_ptr t_zd = tcg_temp_new_ptr();
1918 TCGv_ptr t_zn = tcg_temp_new_ptr();
1919 TCGv_ptr t_pg = tcg_temp_new_ptr();
1920
1921 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1922 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1923 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1924
1925 fns[esz](t_zd, t_zn, t_pg, val, desc);
1926
1927 tcg_temp_free_ptr(t_zd);
1928 tcg_temp_free_ptr(t_zn);
1929 tcg_temp_free_ptr(t_pg);
1930 tcg_temp_free_i32(desc);
1931}
1932
3a7be554 1933static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
1934{
1935 if (a->esz == 0) {
1936 return false;
1937 }
1938 if (sve_access_check(s)) {
1939 /* Decode the VFP immediate. */
1940 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1941 TCGv_i64 t_imm = tcg_const_i64(imm);
1942 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943 tcg_temp_free_i64(t_imm);
1944 }
1945 return true;
1946}
1947
3a7be554 1948static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 1949{
3a7be554 1950 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1951 return false;
1952 }
1953 if (sve_access_check(s)) {
1954 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1955 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1956 tcg_temp_free_i64(t_imm);
1957 }
1958 return true;
1959}
1960
3a7be554 1961static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
1962{
1963 static gen_helper_gvec_2i * const fns[4] = {
1964 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1965 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1966 };
1967
3a7be554 1968 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1969 return false;
1970 }
1971 if (sve_access_check(s)) {
1972 unsigned vsz = vec_full_reg_size(s);
1973 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1974 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1975 pred_full_reg_offset(s, a->pg),
1976 t_imm, vsz, vsz, 0, fns[a->esz]);
1977 tcg_temp_free_i64(t_imm);
1978 }
1979 return true;
1980}
1981
b94f8f60
RH
1982/*
1983 *** SVE Permute Extract Group
1984 */
1985
3a7be554 1986static bool trans_EXT(DisasContext *s, arg_EXT *a)
b94f8f60
RH
1987{
1988 if (!sve_access_check(s)) {
1989 return true;
1990 }
1991
1992 unsigned vsz = vec_full_reg_size(s);
1993 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1994 unsigned n_siz = vsz - n_ofs;
1995 unsigned d = vec_full_reg_offset(s, a->rd);
1996 unsigned n = vec_full_reg_offset(s, a->rn);
1997 unsigned m = vec_full_reg_offset(s, a->rm);
1998
1999 /* Use host vector move insns if we have appropriate sizes
2000 * and no unfortunate overlap.
2001 */
2002 if (m != d
2003 && n_ofs == size_for_gvec(n_ofs)
2004 && n_siz == size_for_gvec(n_siz)
2005 && (d != n || n_siz <= n_ofs)) {
2006 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2007 if (n_ofs != 0) {
2008 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2009 }
2010 } else {
2011 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2012 }
2013 return true;
2014}
2015
30562ab7
RH
2016/*
2017 *** SVE Permute - Unpredicated Group
2018 */
2019
3a7be554 2020static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2021{
2022 if (sve_access_check(s)) {
2023 unsigned vsz = vec_full_reg_size(s);
2024 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2025 vsz, vsz, cpu_reg_sp(s, a->rn));
2026 }
2027 return true;
2028}
2029
3a7be554 2030static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2031{
2032 if ((a->imm & 0x1f) == 0) {
2033 return false;
2034 }
2035 if (sve_access_check(s)) {
2036 unsigned vsz = vec_full_reg_size(s);
2037 unsigned dofs = vec_full_reg_offset(s, a->rd);
2038 unsigned esz, index;
2039
2040 esz = ctz32(a->imm);
2041 index = a->imm >> (esz + 1);
2042
2043 if ((index << esz) < vsz) {
2044 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2045 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2046 } else {
7e17d50e
RH
2047 /*
2048 * While dup_mem handles 128-bit elements, dup_imm does not.
2049 * Thankfully element size doesn't matter for splatting zero.
2050 */
2051 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
30562ab7
RH
2052 }
2053 }
2054 return true;
2055}
2056
2057static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2058{
2059 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2060 static gen_insr * const fns[4] = {
2061 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2062 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2063 };
2064 unsigned vsz = vec_full_reg_size(s);
2065 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2066 TCGv_ptr t_zd = tcg_temp_new_ptr();
2067 TCGv_ptr t_zn = tcg_temp_new_ptr();
2068
2069 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2070 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2071
2072 fns[a->esz](t_zd, t_zn, val, desc);
2073
2074 tcg_temp_free_ptr(t_zd);
2075 tcg_temp_free_ptr(t_zn);
2076 tcg_temp_free_i32(desc);
2077}
2078
3a7be554 2079static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2080{
2081 if (sve_access_check(s)) {
2082 TCGv_i64 t = tcg_temp_new_i64();
2083 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2084 do_insr_i64(s, a, t);
2085 tcg_temp_free_i64(t);
2086 }
2087 return true;
2088}
2089
3a7be554 2090static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2091{
2092 if (sve_access_check(s)) {
2093 do_insr_i64(s, a, cpu_reg(s, a->rm));
2094 }
2095 return true;
2096}
2097
3a7be554 2098static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
30562ab7
RH
2099{
2100 static gen_helper_gvec_2 * const fns[4] = {
2101 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2102 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2103 };
2104
2105 if (sve_access_check(s)) {
2106 unsigned vsz = vec_full_reg_size(s);
2107 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2108 vec_full_reg_offset(s, a->rn),
2109 vsz, vsz, 0, fns[a->esz]);
2110 }
2111 return true;
2112}
2113
3a7be554 2114static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2115{
2116 static gen_helper_gvec_3 * const fns[4] = {
2117 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2118 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2119 };
2120
2121 if (sve_access_check(s)) {
2122 unsigned vsz = vec_full_reg_size(s);
2123 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2124 vec_full_reg_offset(s, a->rn),
2125 vec_full_reg_offset(s, a->rm),
2126 vsz, vsz, 0, fns[a->esz]);
2127 }
2128 return true;
2129}
2130
3a7be554 2131static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2132{
2133 static gen_helper_gvec_2 * const fns[4][2] = {
2134 { NULL, NULL },
2135 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2136 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2137 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2138 };
2139
2140 if (a->esz == 0) {
2141 return false;
2142 }
2143 if (sve_access_check(s)) {
2144 unsigned vsz = vec_full_reg_size(s);
2145 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2146 vec_full_reg_offset(s, a->rn)
2147 + (a->h ? vsz / 2 : 0),
2148 vsz, vsz, 0, fns[a->esz][a->u]);
2149 }
2150 return true;
2151}
2152
d731d8cb
RH
2153/*
2154 *** SVE Permute - Predicates Group
2155 */
2156
2157static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2158 gen_helper_gvec_3 *fn)
2159{
2160 if (!sve_access_check(s)) {
2161 return true;
2162 }
2163
2164 unsigned vsz = pred_full_reg_size(s);
2165
2166 /* Predicate sizes may be smaller and cannot use simd_desc.
2167 We cannot round up, as we do elsewhere, because we need
2168 the exact size for ZIP2 and REV. We retain the style for
2169 the other helpers for consistency. */
2170 TCGv_ptr t_d = tcg_temp_new_ptr();
2171 TCGv_ptr t_n = tcg_temp_new_ptr();
2172 TCGv_ptr t_m = tcg_temp_new_ptr();
2173 TCGv_i32 t_desc;
2174 int desc;
2175
2176 desc = vsz - 2;
2177 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2178 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2179
2180 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2181 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2182 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2183 t_desc = tcg_const_i32(desc);
2184
2185 fn(t_d, t_n, t_m, t_desc);
2186
2187 tcg_temp_free_ptr(t_d);
2188 tcg_temp_free_ptr(t_n);
2189 tcg_temp_free_ptr(t_m);
2190 tcg_temp_free_i32(t_desc);
2191 return true;
2192}
2193
2194static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2195 gen_helper_gvec_2 *fn)
2196{
2197 if (!sve_access_check(s)) {
2198 return true;
2199 }
2200
2201 unsigned vsz = pred_full_reg_size(s);
2202 TCGv_ptr t_d = tcg_temp_new_ptr();
2203 TCGv_ptr t_n = tcg_temp_new_ptr();
2204 TCGv_i32 t_desc;
2205 int desc;
2206
2207 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2208 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2209
2210 /* Predicate sizes may be smaller and cannot use simd_desc.
2211 We cannot round up, as we do elsewhere, because we need
2212 the exact size for ZIP2 and REV. We retain the style for
2213 the other helpers for consistency. */
2214
2215 desc = vsz - 2;
2216 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2217 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2218 t_desc = tcg_const_i32(desc);
2219
2220 fn(t_d, t_n, t_desc);
2221
2222 tcg_temp_free_i32(t_desc);
2223 tcg_temp_free_ptr(t_d);
2224 tcg_temp_free_ptr(t_n);
2225 return true;
2226}
2227
3a7be554 2228static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2229{
2230 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2231}
2232
3a7be554 2233static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2234{
2235 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2236}
2237
3a7be554 2238static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2239{
2240 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2241}
2242
3a7be554 2243static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2244{
2245 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2246}
2247
3a7be554 2248static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2249{
2250 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2251}
2252
3a7be554 2253static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2254{
2255 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2256}
2257
3a7be554 2258static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2259{
2260 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2261}
2262
3a7be554 2263static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2264{
2265 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2266}
2267
3a7be554 2268static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2269{
2270 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2271}
2272
234b48e9
RH
2273/*
2274 *** SVE Permute - Interleaving Group
2275 */
2276
2277static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2278{
2279 static gen_helper_gvec_3 * const fns[4] = {
2280 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2281 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2282 };
2283
2284 if (sve_access_check(s)) {
2285 unsigned vsz = vec_full_reg_size(s);
2286 unsigned high_ofs = high ? vsz / 2 : 0;
2287 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2288 vec_full_reg_offset(s, a->rn) + high_ofs,
2289 vec_full_reg_offset(s, a->rm) + high_ofs,
2290 vsz, vsz, 0, fns[a->esz]);
2291 }
2292 return true;
2293}
2294
2295static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2296 gen_helper_gvec_3 *fn)
2297{
2298 if (sve_access_check(s)) {
2299 unsigned vsz = vec_full_reg_size(s);
2300 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2301 vec_full_reg_offset(s, a->rn),
2302 vec_full_reg_offset(s, a->rm),
2303 vsz, vsz, data, fn);
2304 }
2305 return true;
2306}
2307
3a7be554 2308static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2309{
2310 return do_zip(s, a, false);
2311}
2312
3a7be554 2313static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2314{
2315 return do_zip(s, a, true);
2316}
2317
2318static gen_helper_gvec_3 * const uzp_fns[4] = {
2319 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2320 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2321};
2322
3a7be554 2323static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2324{
2325 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2326}
2327
3a7be554 2328static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2329{
2330 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2331}
2332
2333static gen_helper_gvec_3 * const trn_fns[4] = {
2334 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2335 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2336};
2337
3a7be554 2338static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2339{
2340 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2341}
2342
3a7be554 2343static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2344{
2345 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2346}
2347
3ca879ae
RH
2348/*
2349 *** SVE Permute Vector - Predicated Group
2350 */
2351
3a7be554 2352static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2353{
2354 static gen_helper_gvec_3 * const fns[4] = {
2355 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2356 };
2357 return do_zpz_ool(s, a, fns[a->esz]);
2358}
2359
ef23cb72
RH
2360/* Call the helper that computes the ARM LastActiveElement pseudocode
2361 * function, scaled by the element size. This includes the not found
2362 * indication; e.g. not found for esz=3 is -8.
2363 */
2364static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2365{
2366 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2367 * round up, as we do elsewhere, because we need the exact size.
2368 */
2369 TCGv_ptr t_p = tcg_temp_new_ptr();
2370 TCGv_i32 t_desc;
2371 unsigned vsz = pred_full_reg_size(s);
2372 unsigned desc;
2373
2374 desc = vsz - 2;
2375 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2376
2377 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2378 t_desc = tcg_const_i32(desc);
2379
2380 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2381
2382 tcg_temp_free_i32(t_desc);
2383 tcg_temp_free_ptr(t_p);
2384}
2385
2386/* Increment LAST to the offset of the next element in the vector,
2387 * wrapping around to 0.
2388 */
2389static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390{
2391 unsigned vsz = vec_full_reg_size(s);
2392
2393 tcg_gen_addi_i32(last, last, 1 << esz);
2394 if (is_power_of_2(vsz)) {
2395 tcg_gen_andi_i32(last, last, vsz - 1);
2396 } else {
2397 TCGv_i32 max = tcg_const_i32(vsz);
2398 TCGv_i32 zero = tcg_const_i32(0);
2399 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2400 tcg_temp_free_i32(max);
2401 tcg_temp_free_i32(zero);
2402 }
2403}
2404
2405/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2406static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2407{
2408 unsigned vsz = vec_full_reg_size(s);
2409
2410 if (is_power_of_2(vsz)) {
2411 tcg_gen_andi_i32(last, last, vsz - 1);
2412 } else {
2413 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2414 TCGv_i32 zero = tcg_const_i32(0);
2415 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2416 tcg_temp_free_i32(max);
2417 tcg_temp_free_i32(zero);
2418 }
2419}
2420
2421/* Load an unsigned element of ESZ from BASE+OFS. */
2422static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2423{
2424 TCGv_i64 r = tcg_temp_new_i64();
2425
2426 switch (esz) {
2427 case 0:
2428 tcg_gen_ld8u_i64(r, base, ofs);
2429 break;
2430 case 1:
2431 tcg_gen_ld16u_i64(r, base, ofs);
2432 break;
2433 case 2:
2434 tcg_gen_ld32u_i64(r, base, ofs);
2435 break;
2436 case 3:
2437 tcg_gen_ld_i64(r, base, ofs);
2438 break;
2439 default:
2440 g_assert_not_reached();
2441 }
2442 return r;
2443}
2444
2445/* Load an unsigned element of ESZ from RM[LAST]. */
2446static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2447 int rm, int esz)
2448{
2449 TCGv_ptr p = tcg_temp_new_ptr();
2450 TCGv_i64 r;
2451
2452 /* Convert offset into vector into offset into ENV.
2453 * The final adjustment for the vector register base
2454 * is added via constant offset to the load.
2455 */
2456#ifdef HOST_WORDS_BIGENDIAN
2457 /* Adjust for element ordering. See vec_reg_offset. */
2458 if (esz < 3) {
2459 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2460 }
2461#endif
2462 tcg_gen_ext_i32_ptr(p, last);
2463 tcg_gen_add_ptr(p, p, cpu_env);
2464
2465 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2466 tcg_temp_free_ptr(p);
2467
2468 return r;
2469}
2470
2471/* Compute CLAST for a Zreg. */
2472static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2473{
2474 TCGv_i32 last;
2475 TCGLabel *over;
2476 TCGv_i64 ele;
2477 unsigned vsz, esz = a->esz;
2478
2479 if (!sve_access_check(s)) {
2480 return true;
2481 }
2482
2483 last = tcg_temp_local_new_i32();
2484 over = gen_new_label();
2485
2486 find_last_active(s, last, esz, a->pg);
2487
2488 /* There is of course no movcond for a 2048-bit vector,
2489 * so we must branch over the actual store.
2490 */
2491 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2492
2493 if (!before) {
2494 incr_last_active(s, last, esz);
2495 }
2496
2497 ele = load_last_active(s, last, a->rm, esz);
2498 tcg_temp_free_i32(last);
2499
2500 vsz = vec_full_reg_size(s);
2501 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2502 tcg_temp_free_i64(ele);
2503
2504 /* If this insn used MOVPRFX, we may need a second move. */
2505 if (a->rd != a->rn) {
2506 TCGLabel *done = gen_new_label();
2507 tcg_gen_br(done);
2508
2509 gen_set_label(over);
2510 do_mov_z(s, a->rd, a->rn);
2511
2512 gen_set_label(done);
2513 } else {
2514 gen_set_label(over);
2515 }
2516 return true;
2517}
2518
3a7be554 2519static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2520{
2521 return do_clast_vector(s, a, false);
2522}
2523
3a7be554 2524static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2525{
2526 return do_clast_vector(s, a, true);
2527}
2528
2529/* Compute CLAST for a scalar. */
2530static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2531 bool before, TCGv_i64 reg_val)
2532{
2533 TCGv_i32 last = tcg_temp_new_i32();
2534 TCGv_i64 ele, cmp, zero;
2535
2536 find_last_active(s, last, esz, pg);
2537
2538 /* Extend the original value of last prior to incrementing. */
2539 cmp = tcg_temp_new_i64();
2540 tcg_gen_ext_i32_i64(cmp, last);
2541
2542 if (!before) {
2543 incr_last_active(s, last, esz);
2544 }
2545
2546 /* The conceit here is that while last < 0 indicates not found, after
2547 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2548 * from which we can load garbage. We then discard the garbage with
2549 * a conditional move.
2550 */
2551 ele = load_last_active(s, last, rm, esz);
2552 tcg_temp_free_i32(last);
2553
2554 zero = tcg_const_i64(0);
2555 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2556
2557 tcg_temp_free_i64(zero);
2558 tcg_temp_free_i64(cmp);
2559 tcg_temp_free_i64(ele);
2560}
2561
2562/* Compute CLAST for a Vreg. */
2563static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2564{
2565 if (sve_access_check(s)) {
2566 int esz = a->esz;
2567 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2568 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2569
2570 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2571 write_fp_dreg(s, a->rd, reg);
2572 tcg_temp_free_i64(reg);
2573 }
2574 return true;
2575}
2576
3a7be554 2577static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2578{
2579 return do_clast_fp(s, a, false);
2580}
2581
3a7be554 2582static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2583{
2584 return do_clast_fp(s, a, true);
2585}
2586
2587/* Compute CLAST for a Xreg. */
2588static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2589{
2590 TCGv_i64 reg;
2591
2592 if (!sve_access_check(s)) {
2593 return true;
2594 }
2595
2596 reg = cpu_reg(s, a->rd);
2597 switch (a->esz) {
2598 case 0:
2599 tcg_gen_ext8u_i64(reg, reg);
2600 break;
2601 case 1:
2602 tcg_gen_ext16u_i64(reg, reg);
2603 break;
2604 case 2:
2605 tcg_gen_ext32u_i64(reg, reg);
2606 break;
2607 case 3:
2608 break;
2609 default:
2610 g_assert_not_reached();
2611 }
2612
2613 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2614 return true;
2615}
2616
3a7be554 2617static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2618{
2619 return do_clast_general(s, a, false);
2620}
2621
3a7be554 2622static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2623{
2624 return do_clast_general(s, a, true);
2625}
2626
2627/* Compute LAST for a scalar. */
2628static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2629 int pg, int rm, bool before)
2630{
2631 TCGv_i32 last = tcg_temp_new_i32();
2632 TCGv_i64 ret;
2633
2634 find_last_active(s, last, esz, pg);
2635 if (before) {
2636 wrap_last_active(s, last, esz);
2637 } else {
2638 incr_last_active(s, last, esz);
2639 }
2640
2641 ret = load_last_active(s, last, rm, esz);
2642 tcg_temp_free_i32(last);
2643 return ret;
2644}
2645
2646/* Compute LAST for a Vreg. */
2647static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2648{
2649 if (sve_access_check(s)) {
2650 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2651 write_fp_dreg(s, a->rd, val);
2652 tcg_temp_free_i64(val);
2653 }
2654 return true;
2655}
2656
3a7be554 2657static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2658{
2659 return do_last_fp(s, a, false);
2660}
2661
3a7be554 2662static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2663{
2664 return do_last_fp(s, a, true);
2665}
2666
2667/* Compute LAST for a Xreg. */
2668static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2669{
2670 if (sve_access_check(s)) {
2671 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2672 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2673 tcg_temp_free_i64(val);
2674 }
2675 return true;
2676}
2677
3a7be554 2678static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2679{
2680 return do_last_general(s, a, false);
2681}
2682
3a7be554 2683static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2684{
2685 return do_last_general(s, a, true);
2686}
2687
3a7be554 2688static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2689{
2690 if (sve_access_check(s)) {
2691 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2692 }
2693 return true;
2694}
2695
3a7be554 2696static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2697{
2698 if (sve_access_check(s)) {
2699 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2700 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2701 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2702 tcg_temp_free_i64(t);
2703 }
2704 return true;
2705}
2706
3a7be554 2707static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2708{
2709 static gen_helper_gvec_3 * const fns[4] = {
2710 NULL,
2711 gen_helper_sve_revb_h,
2712 gen_helper_sve_revb_s,
2713 gen_helper_sve_revb_d,
2714 };
2715 return do_zpz_ool(s, a, fns[a->esz]);
2716}
2717
3a7be554 2718static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2719{
2720 static gen_helper_gvec_3 * const fns[4] = {
2721 NULL,
2722 NULL,
2723 gen_helper_sve_revh_s,
2724 gen_helper_sve_revh_d,
2725 };
2726 return do_zpz_ool(s, a, fns[a->esz]);
2727}
2728
3a7be554 2729static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2730{
2731 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2732}
2733
3a7be554 2734static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2735{
2736 static gen_helper_gvec_3 * const fns[4] = {
2737 gen_helper_sve_rbit_b,
2738 gen_helper_sve_rbit_h,
2739 gen_helper_sve_rbit_s,
2740 gen_helper_sve_rbit_d,
2741 };
2742 return do_zpz_ool(s, a, fns[a->esz]);
2743}
2744
3a7be554 2745static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
2746{
2747 if (sve_access_check(s)) {
2748 unsigned vsz = vec_full_reg_size(s);
2749 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2750 vec_full_reg_offset(s, a->rn),
2751 vec_full_reg_offset(s, a->rm),
2752 pred_full_reg_offset(s, a->pg),
2753 vsz, vsz, a->esz, gen_helper_sve_splice);
2754 }
2755 return true;
2756}
2757
757f9cff
RH
2758/*
2759 *** SVE Integer Compare - Vectors Group
2760 */
2761
2762static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2763 gen_helper_gvec_flags_4 *gen_fn)
2764{
2765 TCGv_ptr pd, zn, zm, pg;
2766 unsigned vsz;
2767 TCGv_i32 t;
2768
2769 if (gen_fn == NULL) {
2770 return false;
2771 }
2772 if (!sve_access_check(s)) {
2773 return true;
2774 }
2775
2776 vsz = vec_full_reg_size(s);
2777 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2778 pd = tcg_temp_new_ptr();
2779 zn = tcg_temp_new_ptr();
2780 zm = tcg_temp_new_ptr();
2781 pg = tcg_temp_new_ptr();
2782
2783 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2784 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2785 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2786 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2787
2788 gen_fn(t, pd, zn, zm, pg, t);
2789
2790 tcg_temp_free_ptr(pd);
2791 tcg_temp_free_ptr(zn);
2792 tcg_temp_free_ptr(zm);
2793 tcg_temp_free_ptr(pg);
2794
2795 do_pred_flags(t);
2796
2797 tcg_temp_free_i32(t);
2798 return true;
2799}
2800
2801#define DO_PPZZ(NAME, name) \
3a7be554 2802static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2803{ \
2804 static gen_helper_gvec_flags_4 * const fns[4] = { \
2805 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2806 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2807 }; \
2808 return do_ppzz_flags(s, a, fns[a->esz]); \
2809}
2810
2811DO_PPZZ(CMPEQ, cmpeq)
2812DO_PPZZ(CMPNE, cmpne)
2813DO_PPZZ(CMPGT, cmpgt)
2814DO_PPZZ(CMPGE, cmpge)
2815DO_PPZZ(CMPHI, cmphi)
2816DO_PPZZ(CMPHS, cmphs)
2817
2818#undef DO_PPZZ
2819
2820#define DO_PPZW(NAME, name) \
3a7be554 2821static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2822{ \
2823 static gen_helper_gvec_flags_4 * const fns[4] = { \
2824 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2825 gen_helper_sve_##name##_ppzw_s, NULL \
2826 }; \
2827 return do_ppzz_flags(s, a, fns[a->esz]); \
2828}
2829
2830DO_PPZW(CMPEQ, cmpeq)
2831DO_PPZW(CMPNE, cmpne)
2832DO_PPZW(CMPGT, cmpgt)
2833DO_PPZW(CMPGE, cmpge)
2834DO_PPZW(CMPHI, cmphi)
2835DO_PPZW(CMPHS, cmphs)
2836DO_PPZW(CMPLT, cmplt)
2837DO_PPZW(CMPLE, cmple)
2838DO_PPZW(CMPLO, cmplo)
2839DO_PPZW(CMPLS, cmpls)
2840
2841#undef DO_PPZW
2842
38cadeba
RH
2843/*
2844 *** SVE Integer Compare - Immediate Groups
2845 */
2846
2847static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2848 gen_helper_gvec_flags_3 *gen_fn)
2849{
2850 TCGv_ptr pd, zn, pg;
2851 unsigned vsz;
2852 TCGv_i32 t;
2853
2854 if (gen_fn == NULL) {
2855 return false;
2856 }
2857 if (!sve_access_check(s)) {
2858 return true;
2859 }
2860
2861 vsz = vec_full_reg_size(s);
2862 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2863 pd = tcg_temp_new_ptr();
2864 zn = tcg_temp_new_ptr();
2865 pg = tcg_temp_new_ptr();
2866
2867 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2868 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2869 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2870
2871 gen_fn(t, pd, zn, pg, t);
2872
2873 tcg_temp_free_ptr(pd);
2874 tcg_temp_free_ptr(zn);
2875 tcg_temp_free_ptr(pg);
2876
2877 do_pred_flags(t);
2878
2879 tcg_temp_free_i32(t);
2880 return true;
2881}
2882
2883#define DO_PPZI(NAME, name) \
3a7be554 2884static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
2885{ \
2886 static gen_helper_gvec_flags_3 * const fns[4] = { \
2887 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2888 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2889 }; \
2890 return do_ppzi_flags(s, a, fns[a->esz]); \
2891}
2892
2893DO_PPZI(CMPEQ, cmpeq)
2894DO_PPZI(CMPNE, cmpne)
2895DO_PPZI(CMPGT, cmpgt)
2896DO_PPZI(CMPGE, cmpge)
2897DO_PPZI(CMPHI, cmphi)
2898DO_PPZI(CMPHS, cmphs)
2899DO_PPZI(CMPLT, cmplt)
2900DO_PPZI(CMPLE, cmple)
2901DO_PPZI(CMPLO, cmplo)
2902DO_PPZI(CMPLS, cmpls)
2903
2904#undef DO_PPZI
2905
35da316f
RH
2906/*
2907 *** SVE Partition Break Group
2908 */
2909
2910static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2911 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2912{
2913 if (!sve_access_check(s)) {
2914 return true;
2915 }
2916
2917 unsigned vsz = pred_full_reg_size(s);
2918
2919 /* Predicate sizes may be smaller and cannot use simd_desc. */
2920 TCGv_ptr d = tcg_temp_new_ptr();
2921 TCGv_ptr n = tcg_temp_new_ptr();
2922 TCGv_ptr m = tcg_temp_new_ptr();
2923 TCGv_ptr g = tcg_temp_new_ptr();
2924 TCGv_i32 t = tcg_const_i32(vsz - 2);
2925
2926 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2927 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2928 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2929 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2930
2931 if (a->s) {
2932 fn_s(t, d, n, m, g, t);
2933 do_pred_flags(t);
2934 } else {
2935 fn(d, n, m, g, t);
2936 }
2937 tcg_temp_free_ptr(d);
2938 tcg_temp_free_ptr(n);
2939 tcg_temp_free_ptr(m);
2940 tcg_temp_free_ptr(g);
2941 tcg_temp_free_i32(t);
2942 return true;
2943}
2944
2945static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2946 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2947{
2948 if (!sve_access_check(s)) {
2949 return true;
2950 }
2951
2952 unsigned vsz = pred_full_reg_size(s);
2953
2954 /* Predicate sizes may be smaller and cannot use simd_desc. */
2955 TCGv_ptr d = tcg_temp_new_ptr();
2956 TCGv_ptr n = tcg_temp_new_ptr();
2957 TCGv_ptr g = tcg_temp_new_ptr();
2958 TCGv_i32 t = tcg_const_i32(vsz - 2);
2959
2960 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2961 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2962 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2963
2964 if (a->s) {
2965 fn_s(t, d, n, g, t);
2966 do_pred_flags(t);
2967 } else {
2968 fn(d, n, g, t);
2969 }
2970 tcg_temp_free_ptr(d);
2971 tcg_temp_free_ptr(n);
2972 tcg_temp_free_ptr(g);
2973 tcg_temp_free_i32(t);
2974 return true;
2975}
2976
3a7be554 2977static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2978{
2979 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2980}
2981
3a7be554 2982static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2983{
2984 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2985}
2986
3a7be554 2987static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2988{
2989 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2990}
2991
3a7be554 2992static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2993{
2994 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2995}
2996
3a7be554 2997static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2998{
2999 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3000}
3001
3a7be554 3002static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3003{
3004 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3005}
3006
3a7be554 3007static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3008{
3009 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3010}
3011
9ee3a611
RH
3012/*
3013 *** SVE Predicate Count Group
3014 */
3015
3016static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3017{
3018 unsigned psz = pred_full_reg_size(s);
3019
3020 if (psz <= 8) {
3021 uint64_t psz_mask;
3022
3023 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3024 if (pn != pg) {
3025 TCGv_i64 g = tcg_temp_new_i64();
3026 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3027 tcg_gen_and_i64(val, val, g);
3028 tcg_temp_free_i64(g);
3029 }
3030
3031 /* Reduce the pred_esz_masks value simply to reduce the
3032 * size of the code generated here.
3033 */
3034 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3035 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3036
3037 tcg_gen_ctpop_i64(val, val);
3038 } else {
3039 TCGv_ptr t_pn = tcg_temp_new_ptr();
3040 TCGv_ptr t_pg = tcg_temp_new_ptr();
3041 unsigned desc;
3042 TCGv_i32 t_desc;
3043
3044 desc = psz - 2;
3045 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3046
3047 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3048 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3049 t_desc = tcg_const_i32(desc);
3050
3051 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3052 tcg_temp_free_ptr(t_pn);
3053 tcg_temp_free_ptr(t_pg);
3054 tcg_temp_free_i32(t_desc);
3055 }
3056}
3057
3a7be554 3058static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3059{
3060 if (sve_access_check(s)) {
3061 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3062 }
3063 return true;
3064}
3065
3a7be554 3066static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3067{
3068 if (sve_access_check(s)) {
3069 TCGv_i64 reg = cpu_reg(s, a->rd);
3070 TCGv_i64 val = tcg_temp_new_i64();
3071
3072 do_cntp(s, val, a->esz, a->pg, a->pg);
3073 if (a->d) {
3074 tcg_gen_sub_i64(reg, reg, val);
3075 } else {
3076 tcg_gen_add_i64(reg, reg, val);
3077 }
3078 tcg_temp_free_i64(val);
3079 }
3080 return true;
3081}
3082
3a7be554 3083static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3084{
3085 if (a->esz == 0) {
3086 return false;
3087 }
3088 if (sve_access_check(s)) {
3089 unsigned vsz = vec_full_reg_size(s);
3090 TCGv_i64 val = tcg_temp_new_i64();
3091 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3092
3093 do_cntp(s, val, a->esz, a->pg, a->pg);
3094 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3095 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3096 }
3097 return true;
3098}
3099
3a7be554 3100static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3101{
3102 if (sve_access_check(s)) {
3103 TCGv_i64 reg = cpu_reg(s, a->rd);
3104 TCGv_i64 val = tcg_temp_new_i64();
3105
3106 do_cntp(s, val, a->esz, a->pg, a->pg);
3107 do_sat_addsub_32(reg, val, a->u, a->d);
3108 }
3109 return true;
3110}
3111
3a7be554 3112static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3113{
3114 if (sve_access_check(s)) {
3115 TCGv_i64 reg = cpu_reg(s, a->rd);
3116 TCGv_i64 val = tcg_temp_new_i64();
3117
3118 do_cntp(s, val, a->esz, a->pg, a->pg);
3119 do_sat_addsub_64(reg, val, a->u, a->d);
3120 }
3121 return true;
3122}
3123
3a7be554 3124static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3125{
3126 if (a->esz == 0) {
3127 return false;
3128 }
3129 if (sve_access_check(s)) {
3130 TCGv_i64 val = tcg_temp_new_i64();
3131 do_cntp(s, val, a->esz, a->pg, a->pg);
3132 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3133 }
3134 return true;
3135}
3136
caf1cefc
RH
3137/*
3138 *** SVE Integer Compare Scalars Group
3139 */
3140
3a7be554 3141static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3142{
3143 if (!sve_access_check(s)) {
3144 return true;
3145 }
3146
3147 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3148 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3149 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3150 TCGv_i64 cmp = tcg_temp_new_i64();
3151
3152 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3153 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3154 tcg_temp_free_i64(cmp);
3155
3156 /* VF = !NF & !CF. */
3157 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3158 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3159
3160 /* Both NF and VF actually look at bit 31. */
3161 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3162 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3163 return true;
3164}
3165
3a7be554 3166static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3167{
bbd0968c 3168 TCGv_i64 op0, op1, t0, t1, tmax;
caf1cefc
RH
3169 TCGv_i32 t2, t3;
3170 TCGv_ptr ptr;
3171 unsigned desc, vsz = vec_full_reg_size(s);
3172 TCGCond cond;
3173
bbd0968c
RH
3174 if (!sve_access_check(s)) {
3175 return true;
3176 }
3177
3178 op0 = read_cpu_reg(s, a->rn, 1);
3179 op1 = read_cpu_reg(s, a->rm, 1);
3180
caf1cefc
RH
3181 if (!a->sf) {
3182 if (a->u) {
3183 tcg_gen_ext32u_i64(op0, op0);
3184 tcg_gen_ext32u_i64(op1, op1);
3185 } else {
3186 tcg_gen_ext32s_i64(op0, op0);
3187 tcg_gen_ext32s_i64(op1, op1);
3188 }
3189 }
3190
3191 /* For the helper, compress the different conditions into a computation
3192 * of how many iterations for which the condition is true.
caf1cefc 3193 */
bbd0968c
RH
3194 t0 = tcg_temp_new_i64();
3195 t1 = tcg_temp_new_i64();
caf1cefc
RH
3196 tcg_gen_sub_i64(t0, op1, op0);
3197
bbd0968c 3198 tmax = tcg_const_i64(vsz >> a->esz);
caf1cefc
RH
3199 if (a->eq) {
3200 /* Equality means one more iteration. */
3201 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c
RH
3202
3203 /* If op1 is max (un)signed integer (and the only time the addition
3204 * above could overflow), then we produce an all-true predicate by
3205 * setting the count to the vector length. This is because the
3206 * pseudocode is described as an increment + compare loop, and the
3207 * max integer would always compare true.
3208 */
3209 tcg_gen_movi_i64(t1, (a->sf
3210 ? (a->u ? UINT64_MAX : INT64_MAX)
3211 : (a->u ? UINT32_MAX : INT32_MAX)));
3212 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3213 }
3214
bbd0968c
RH
3215 /* Bound to the maximum. */
3216 tcg_gen_umin_i64(t0, t0, tmax);
3217 tcg_temp_free_i64(tmax);
3218
3219 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3220 cond = (a->u
3221 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3222 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3223 tcg_gen_movi_i64(t1, 0);
3224 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3225 tcg_temp_free_i64(t1);
caf1cefc 3226
bbd0968c 3227 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3228 t2 = tcg_temp_new_i32();
3229 tcg_gen_extrl_i64_i32(t2, t0);
3230 tcg_temp_free_i64(t0);
bbd0968c
RH
3231
3232 /* Scale elements to bits. */
3233 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc
RH
3234
3235 desc = (vsz / 8) - 2;
3236 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3237 t3 = tcg_const_i32(desc);
3238
3239 ptr = tcg_temp_new_ptr();
3240 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3241
3242 gen_helper_sve_while(t2, ptr, t2, t3);
3243 do_pred_flags(t2);
3244
3245 tcg_temp_free_ptr(ptr);
3246 tcg_temp_free_i32(t2);
3247 tcg_temp_free_i32(t3);
3248 return true;
3249}
3250
ed491961
RH
3251/*
3252 *** SVE Integer Wide Immediate - Unpredicated Group
3253 */
3254
3a7be554 3255static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3256{
3257 if (a->esz == 0) {
3258 return false;
3259 }
3260 if (sve_access_check(s)) {
3261 unsigned vsz = vec_full_reg_size(s);
3262 int dofs = vec_full_reg_offset(s, a->rd);
3263 uint64_t imm;
3264
3265 /* Decode the VFP immediate. */
3266 imm = vfp_expand_imm(a->esz, a->imm);
8711e71f 3267 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
ed491961
RH
3268 }
3269 return true;
3270}
3271
3a7be554 3272static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3273{
3a7be554 3274 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3275 return false;
3276 }
3277 if (sve_access_check(s)) {
3278 unsigned vsz = vec_full_reg_size(s);
3279 int dofs = vec_full_reg_offset(s, a->rd);
3280
8711e71f 3281 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
ed491961
RH
3282 }
3283 return true;
3284}
3285
3a7be554 3286static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3287{
3a7be554 3288 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3289 return false;
3290 }
3291 if (sve_access_check(s)) {
3292 unsigned vsz = vec_full_reg_size(s);
3293 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3294 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3295 }
3296 return true;
3297}
3298
3a7be554 3299static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3300{
3301 a->imm = -a->imm;
3a7be554 3302 return trans_ADD_zzi(s, a);
6e6a157d
RH
3303}
3304
3a7be554 3305static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3306{
53229a77 3307 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
6e6a157d
RH
3308 static const GVecGen2s op[4] = {
3309 { .fni8 = tcg_gen_vec_sub8_i64,
3310 .fniv = tcg_gen_sub_vec,
3311 .fno = gen_helper_sve_subri_b,
53229a77 3312 .opt_opc = vecop_list,
6e6a157d
RH
3313 .vece = MO_8,
3314 .scalar_first = true },
3315 { .fni8 = tcg_gen_vec_sub16_i64,
3316 .fniv = tcg_gen_sub_vec,
3317 .fno = gen_helper_sve_subri_h,
53229a77 3318 .opt_opc = vecop_list,
6e6a157d
RH
3319 .vece = MO_16,
3320 .scalar_first = true },
3321 { .fni4 = tcg_gen_sub_i32,
3322 .fniv = tcg_gen_sub_vec,
3323 .fno = gen_helper_sve_subri_s,
53229a77 3324 .opt_opc = vecop_list,
6e6a157d
RH
3325 .vece = MO_32,
3326 .scalar_first = true },
3327 { .fni8 = tcg_gen_sub_i64,
3328 .fniv = tcg_gen_sub_vec,
3329 .fno = gen_helper_sve_subri_d,
53229a77 3330 .opt_opc = vecop_list,
6e6a157d
RH
3331 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3332 .vece = MO_64,
3333 .scalar_first = true }
3334 };
3335
3a7be554 3336 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3337 return false;
3338 }
3339 if (sve_access_check(s)) {
3340 unsigned vsz = vec_full_reg_size(s);
3341 TCGv_i64 c = tcg_const_i64(a->imm);
3342 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3343 vec_full_reg_offset(s, a->rn),
3344 vsz, vsz, c, &op[a->esz]);
3345 tcg_temp_free_i64(c);
3346 }
3347 return true;
3348}
3349
3a7be554 3350static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3351{
3352 if (sve_access_check(s)) {
3353 unsigned vsz = vec_full_reg_size(s);
3354 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3355 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3356 }
3357 return true;
3358}
3359
3a7be554 3360static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3361{
3a7be554 3362 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3363 return false;
3364 }
3365 if (sve_access_check(s)) {
3366 TCGv_i64 val = tcg_const_i64(a->imm);
3367 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3368 tcg_temp_free_i64(val);
3369 }
3370 return true;
3371}
3372
3a7be554 3373static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3374{
3a7be554 3375 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3376}
3377
3a7be554 3378static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3379{
3a7be554 3380 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3381}
3382
3a7be554 3383static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3384{
3a7be554 3385 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3386}
3387
3a7be554 3388static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3389{
3a7be554 3390 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3391}
3392
3393static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3394{
3395 if (sve_access_check(s)) {
3396 unsigned vsz = vec_full_reg_size(s);
3397 TCGv_i64 c = tcg_const_i64(a->imm);
3398
3399 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3400 vec_full_reg_offset(s, a->rn),
3401 c, vsz, vsz, 0, fn);
3402 tcg_temp_free_i64(c);
3403 }
3404 return true;
3405}
3406
3407#define DO_ZZI(NAME, name) \
3a7be554 3408static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3409{ \
3410 static gen_helper_gvec_2i * const fns[4] = { \
3411 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3412 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3413 }; \
3414 return do_zzi_ool(s, a, fns[a->esz]); \
3415}
3416
3417DO_ZZI(SMAX, smax)
3418DO_ZZI(UMAX, umax)
3419DO_ZZI(SMIN, smin)
3420DO_ZZI(UMIN, umin)
3421
3422#undef DO_ZZI
3423
3a7be554 3424static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
d730ecaa
RH
3425{
3426 static gen_helper_gvec_3 * const fns[2][2] = {
3427 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3428 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3429 };
3430
3431 if (sve_access_check(s)) {
3432 unsigned vsz = vec_full_reg_size(s);
3433 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3434 vec_full_reg_offset(s, a->rn),
3435 vec_full_reg_offset(s, a->rm),
3436 vsz, vsz, 0, fns[a->u][a->sz]);
3437 }
3438 return true;
3439}
3440
3a7be554 3441static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
16fcfdc7
RH
3442{
3443 static gen_helper_gvec_3 * const fns[2][2] = {
3444 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3445 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3446 };
3447
3448 if (sve_access_check(s)) {
3449 unsigned vsz = vec_full_reg_size(s);
3450 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3451 vec_full_reg_offset(s, a->rn),
3452 vec_full_reg_offset(s, a->rm),
3453 vsz, vsz, a->index, fns[a->u][a->sz]);
3454 }
3455 return true;
3456}
3457
3458
ca40a6e6
RH
3459/*
3460 *** SVE Floating Point Multiply-Add Indexed Group
3461 */
3462
3a7be554 3463static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
ca40a6e6
RH
3464{
3465 static gen_helper_gvec_4_ptr * const fns[3] = {
3466 gen_helper_gvec_fmla_idx_h,
3467 gen_helper_gvec_fmla_idx_s,
3468 gen_helper_gvec_fmla_idx_d,
3469 };
3470
3471 if (sve_access_check(s)) {
3472 unsigned vsz = vec_full_reg_size(s);
3473 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3474 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3475 vec_full_reg_offset(s, a->rn),
3476 vec_full_reg_offset(s, a->rm),
3477 vec_full_reg_offset(s, a->ra),
3478 status, vsz, vsz, (a->index << 1) | a->sub,
3479 fns[a->esz - 1]);
3480 tcg_temp_free_ptr(status);
3481 }
3482 return true;
3483}
3484
3485/*
3486 *** SVE Floating Point Multiply Indexed Group
3487 */
3488
3a7be554 3489static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3490{
3491 static gen_helper_gvec_3_ptr * const fns[3] = {
3492 gen_helper_gvec_fmul_idx_h,
3493 gen_helper_gvec_fmul_idx_s,
3494 gen_helper_gvec_fmul_idx_d,
3495 };
3496
3497 if (sve_access_check(s)) {
3498 unsigned vsz = vec_full_reg_size(s);
3499 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3500 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3501 vec_full_reg_offset(s, a->rn),
3502 vec_full_reg_offset(s, a->rm),
3503 status, vsz, vsz, a->index, fns[a->esz - 1]);
3504 tcg_temp_free_ptr(status);
3505 }
3506 return true;
3507}
3508
23fbe79f
RH
3509/*
3510 *** SVE Floating Point Fast Reduction Group
3511 */
3512
3513typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3514 TCGv_ptr, TCGv_i32);
3515
3516static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3517 gen_helper_fp_reduce *fn)
3518{
3519 unsigned vsz = vec_full_reg_size(s);
3520 unsigned p2vsz = pow2ceil(vsz);
3521 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3522 TCGv_ptr t_zn, t_pg, status;
3523 TCGv_i64 temp;
3524
3525 temp = tcg_temp_new_i64();
3526 t_zn = tcg_temp_new_ptr();
3527 t_pg = tcg_temp_new_ptr();
3528
3529 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3530 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3531 status = get_fpstatus_ptr(a->esz == MO_16);
3532
3533 fn(temp, t_zn, t_pg, status, t_desc);
3534 tcg_temp_free_ptr(t_zn);
3535 tcg_temp_free_ptr(t_pg);
3536 tcg_temp_free_ptr(status);
3537 tcg_temp_free_i32(t_desc);
3538
3539 write_fp_dreg(s, a->rd, temp);
3540 tcg_temp_free_i64(temp);
3541}
3542
3543#define DO_VPZ(NAME, name) \
3a7be554 3544static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3545{ \
3546 static gen_helper_fp_reduce * const fns[3] = { \
3547 gen_helper_sve_##name##_h, \
3548 gen_helper_sve_##name##_s, \
3549 gen_helper_sve_##name##_d, \
3550 }; \
3551 if (a->esz == 0) { \
3552 return false; \
3553 } \
3554 if (sve_access_check(s)) { \
3555 do_reduce(s, a, fns[a->esz - 1]); \
3556 } \
3557 return true; \
3558}
3559
3560DO_VPZ(FADDV, faddv)
3561DO_VPZ(FMINNMV, fminnmv)
3562DO_VPZ(FMAXNMV, fmaxnmv)
3563DO_VPZ(FMINV, fminv)
3564DO_VPZ(FMAXV, fmaxv)
3565
3887c038
RH
3566/*
3567 *** SVE Floating Point Unary Operations - Unpredicated Group
3568 */
3569
3570static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3571{
3572 unsigned vsz = vec_full_reg_size(s);
3573 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3574
3575 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3576 vec_full_reg_offset(s, a->rn),
3577 status, vsz, vsz, 0, fn);
3578 tcg_temp_free_ptr(status);
3579}
3580
3a7be554 3581static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3582{
3583 static gen_helper_gvec_2_ptr * const fns[3] = {
3584 gen_helper_gvec_frecpe_h,
3585 gen_helper_gvec_frecpe_s,
3586 gen_helper_gvec_frecpe_d,
3587 };
3588 if (a->esz == 0) {
3589 return false;
3590 }
3591 if (sve_access_check(s)) {
3592 do_zz_fp(s, a, fns[a->esz - 1]);
3593 }
3594 return true;
3595}
3596
3a7be554 3597static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3598{
3599 static gen_helper_gvec_2_ptr * const fns[3] = {
3600 gen_helper_gvec_frsqrte_h,
3601 gen_helper_gvec_frsqrte_s,
3602 gen_helper_gvec_frsqrte_d,
3603 };
3604 if (a->esz == 0) {
3605 return false;
3606 }
3607 if (sve_access_check(s)) {
3608 do_zz_fp(s, a, fns[a->esz - 1]);
3609 }
3610 return true;
3611}
3612
4d2e2a03
RH
3613/*
3614 *** SVE Floating Point Compare with Zero Group
3615 */
3616
3617static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3618 gen_helper_gvec_3_ptr *fn)
3619{
3620 unsigned vsz = vec_full_reg_size(s);
3621 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3622
3623 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3624 vec_full_reg_offset(s, a->rn),
3625 pred_full_reg_offset(s, a->pg),
3626 status, vsz, vsz, 0, fn);
3627 tcg_temp_free_ptr(status);
3628}
3629
3630#define DO_PPZ(NAME, name) \
3a7be554 3631static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3632{ \
3633 static gen_helper_gvec_3_ptr * const fns[3] = { \
3634 gen_helper_sve_##name##_h, \
3635 gen_helper_sve_##name##_s, \
3636 gen_helper_sve_##name##_d, \
3637 }; \
3638 if (a->esz == 0) { \
3639 return false; \
3640 } \
3641 if (sve_access_check(s)) { \
3642 do_ppz_fp(s, a, fns[a->esz - 1]); \
3643 } \
3644 return true; \
3645}
3646
3647DO_PPZ(FCMGE_ppz0, fcmge0)
3648DO_PPZ(FCMGT_ppz0, fcmgt0)
3649DO_PPZ(FCMLE_ppz0, fcmle0)
3650DO_PPZ(FCMLT_ppz0, fcmlt0)
3651DO_PPZ(FCMEQ_ppz0, fcmeq0)
3652DO_PPZ(FCMNE_ppz0, fcmne0)
3653
3654#undef DO_PPZ
3655
67fcd9ad
RH
3656/*
3657 *** SVE floating-point trig multiply-add coefficient
3658 */
3659
3a7be554 3660static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
3661{
3662 static gen_helper_gvec_3_ptr * const fns[3] = {
3663 gen_helper_sve_ftmad_h,
3664 gen_helper_sve_ftmad_s,
3665 gen_helper_sve_ftmad_d,
3666 };
3667
3668 if (a->esz == 0) {
3669 return false;
3670 }
3671 if (sve_access_check(s)) {
3672 unsigned vsz = vec_full_reg_size(s);
3673 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3674 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3675 vec_full_reg_offset(s, a->rn),
3676 vec_full_reg_offset(s, a->rm),
3677 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3678 tcg_temp_free_ptr(status);
3679 }
3680 return true;
3681}
3682
7f9ddf64
RH
3683/*
3684 *** SVE Floating Point Accumulating Reduction Group
3685 */
3686
3a7be554 3687static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3688{
3689 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3690 TCGv_ptr, TCGv_ptr, TCGv_i32);
3691 static fadda_fn * const fns[3] = {
3692 gen_helper_sve_fadda_h,
3693 gen_helper_sve_fadda_s,
3694 gen_helper_sve_fadda_d,
3695 };
3696 unsigned vsz = vec_full_reg_size(s);
3697 TCGv_ptr t_rm, t_pg, t_fpst;
3698 TCGv_i64 t_val;
3699 TCGv_i32 t_desc;
3700
3701 if (a->esz == 0) {
3702 return false;
3703 }
3704 if (!sve_access_check(s)) {
3705 return true;
3706 }
3707
3708 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3709 t_rm = tcg_temp_new_ptr();
3710 t_pg = tcg_temp_new_ptr();
3711 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3712 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3713 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3714 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3715
3716 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3717
3718 tcg_temp_free_i32(t_desc);
3719 tcg_temp_free_ptr(t_fpst);
3720 tcg_temp_free_ptr(t_pg);
3721 tcg_temp_free_ptr(t_rm);
3722
3723 write_fp_dreg(s, a->rd, t_val);
3724 tcg_temp_free_i64(t_val);
3725 return true;
3726}
3727
29b80469
RH
3728/*
3729 *** SVE Floating Point Arithmetic - Unpredicated Group
3730 */
3731
3732static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3733 gen_helper_gvec_3_ptr *fn)
3734{
3735 if (fn == NULL) {
3736 return false;
3737 }
3738 if (sve_access_check(s)) {
3739 unsigned vsz = vec_full_reg_size(s);
3740 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3741 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3742 vec_full_reg_offset(s, a->rn),
3743 vec_full_reg_offset(s, a->rm),
3744 status, vsz, vsz, 0, fn);
3745 tcg_temp_free_ptr(status);
3746 }
3747 return true;
3748}
3749
3750
3751#define DO_FP3(NAME, name) \
3a7be554 3752static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
3753{ \
3754 static gen_helper_gvec_3_ptr * const fns[4] = { \
3755 NULL, gen_helper_gvec_##name##_h, \
3756 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3757 }; \
3758 return do_zzz_fp(s, a, fns[a->esz]); \
3759}
3760
3761DO_FP3(FADD_zzz, fadd)
3762DO_FP3(FSUB_zzz, fsub)
3763DO_FP3(FMUL_zzz, fmul)
3764DO_FP3(FTSMUL, ftsmul)
3765DO_FP3(FRECPS, recps)
3766DO_FP3(FRSQRTS, rsqrts)
3767
3768#undef DO_FP3
3769
ec3b87c2
RH
3770/*
3771 *** SVE Floating Point Arithmetic - Predicated Group
3772 */
3773
3774static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3775 gen_helper_gvec_4_ptr *fn)
3776{
3777 if (fn == NULL) {
3778 return false;
3779 }
3780 if (sve_access_check(s)) {
3781 unsigned vsz = vec_full_reg_size(s);
3782 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3783 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3784 vec_full_reg_offset(s, a->rn),
3785 vec_full_reg_offset(s, a->rm),
3786 pred_full_reg_offset(s, a->pg),
3787 status, vsz, vsz, 0, fn);
3788 tcg_temp_free_ptr(status);
3789 }
3790 return true;
3791}
3792
3793#define DO_FP3(NAME, name) \
3a7be554 3794static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
3795{ \
3796 static gen_helper_gvec_4_ptr * const fns[4] = { \
3797 NULL, gen_helper_sve_##name##_h, \
3798 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3799 }; \
3800 return do_zpzz_fp(s, a, fns[a->esz]); \
3801}
3802
3803DO_FP3(FADD_zpzz, fadd)
3804DO_FP3(FSUB_zpzz, fsub)
3805DO_FP3(FMUL_zpzz, fmul)
3806DO_FP3(FMIN_zpzz, fmin)
3807DO_FP3(FMAX_zpzz, fmax)
3808DO_FP3(FMINNM_zpzz, fminnum)
3809DO_FP3(FMAXNM_zpzz, fmaxnum)
3810DO_FP3(FABD, fabd)
3811DO_FP3(FSCALE, fscalbn)
3812DO_FP3(FDIV, fdiv)
3813DO_FP3(FMULX, fmulx)
3814
3815#undef DO_FP3
8092c6a3 3816
cc48affe
RH
3817typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3818 TCGv_i64, TCGv_ptr, TCGv_i32);
3819
3820static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3821 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3822{
3823 unsigned vsz = vec_full_reg_size(s);
3824 TCGv_ptr t_zd, t_zn, t_pg, status;
3825 TCGv_i32 desc;
3826
3827 t_zd = tcg_temp_new_ptr();
3828 t_zn = tcg_temp_new_ptr();
3829 t_pg = tcg_temp_new_ptr();
3830 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3831 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3832 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3833
3834 status = get_fpstatus_ptr(is_fp16);
3835 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3836 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3837
3838 tcg_temp_free_i32(desc);
3839 tcg_temp_free_ptr(status);
3840 tcg_temp_free_ptr(t_pg);
3841 tcg_temp_free_ptr(t_zn);
3842 tcg_temp_free_ptr(t_zd);
3843}
3844
3845static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3846 gen_helper_sve_fp2scalar *fn)
3847{
3848 TCGv_i64 temp = tcg_const_i64(imm);
3849 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3850 tcg_temp_free_i64(temp);
3851}
3852
3853#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 3854static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
3855{ \
3856 static gen_helper_sve_fp2scalar * const fns[3] = { \
3857 gen_helper_sve_##name##_h, \
3858 gen_helper_sve_##name##_s, \
3859 gen_helper_sve_##name##_d \
3860 }; \
3861 static uint64_t const val[3][2] = { \
3862 { float16_##const0, float16_##const1 }, \
3863 { float32_##const0, float32_##const1 }, \
3864 { float64_##const0, float64_##const1 }, \
3865 }; \
3866 if (a->esz == 0) { \
3867 return false; \
3868 } \
3869 if (sve_access_check(s)) { \
3870 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3871 } \
3872 return true; \
3873}
3874
3875#define float16_two make_float16(0x4000)
3876#define float32_two make_float32(0x40000000)
3877#define float64_two make_float64(0x4000000000000000ULL)
3878
3879DO_FP_IMM(FADD, fadds, half, one)
3880DO_FP_IMM(FSUB, fsubs, half, one)
3881DO_FP_IMM(FMUL, fmuls, half, two)
3882DO_FP_IMM(FSUBR, fsubrs, half, one)
3883DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3884DO_FP_IMM(FMINNM, fminnms, zero, one)
3885DO_FP_IMM(FMAX, fmaxs, zero, one)
3886DO_FP_IMM(FMIN, fmins, zero, one)
3887
3888#undef DO_FP_IMM
3889
abfdefd5
RH
3890static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3891 gen_helper_gvec_4_ptr *fn)
3892{
3893 if (fn == NULL) {
3894 return false;
3895 }
3896 if (sve_access_check(s)) {
3897 unsigned vsz = vec_full_reg_size(s);
3898 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3899 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3900 vec_full_reg_offset(s, a->rn),
3901 vec_full_reg_offset(s, a->rm),
3902 pred_full_reg_offset(s, a->pg),
3903 status, vsz, vsz, 0, fn);
3904 tcg_temp_free_ptr(status);
3905 }
3906 return true;
3907}
3908
3909#define DO_FPCMP(NAME, name) \
3a7be554 3910static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
3911{ \
3912 static gen_helper_gvec_4_ptr * const fns[4] = { \
3913 NULL, gen_helper_sve_##name##_h, \
3914 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3915 }; \
3916 return do_fp_cmp(s, a, fns[a->esz]); \
3917}
3918
3919DO_FPCMP(FCMGE, fcmge)
3920DO_FPCMP(FCMGT, fcmgt)
3921DO_FPCMP(FCMEQ, fcmeq)
3922DO_FPCMP(FCMNE, fcmne)
3923DO_FPCMP(FCMUO, fcmuo)
3924DO_FPCMP(FACGE, facge)
3925DO_FPCMP(FACGT, facgt)
3926
3927#undef DO_FPCMP
3928
3a7be554 3929static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
3930{
3931 static gen_helper_gvec_4_ptr * const fns[3] = {
3932 gen_helper_sve_fcadd_h,
3933 gen_helper_sve_fcadd_s,
3934 gen_helper_sve_fcadd_d
3935 };
3936
3937 if (a->esz == 0) {
3938 return false;
3939 }
3940 if (sve_access_check(s)) {
3941 unsigned vsz = vec_full_reg_size(s);
3942 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3943 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3944 vec_full_reg_offset(s, a->rn),
3945 vec_full_reg_offset(s, a->rm),
3946 pred_full_reg_offset(s, a->pg),
3947 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3948 tcg_temp_free_ptr(status);
3949 }
3950 return true;
3951}
3952
08975da9
RH
3953static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3954 gen_helper_gvec_5_ptr *fn)
6ceabaad 3955{
08975da9 3956 if (a->esz == 0) {
6ceabaad
RH
3957 return false;
3958 }
08975da9
RH
3959 if (sve_access_check(s)) {
3960 unsigned vsz = vec_full_reg_size(s);
3961 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3962 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3963 vec_full_reg_offset(s, a->rn),
3964 vec_full_reg_offset(s, a->rm),
3965 vec_full_reg_offset(s, a->ra),
3966 pred_full_reg_offset(s, a->pg),
3967 status, vsz, vsz, 0, fn);
3968 tcg_temp_free_ptr(status);
6ceabaad 3969 }
6ceabaad
RH
3970 return true;
3971}
3972
3973#define DO_FMLA(NAME, name) \
3a7be554 3974static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad 3975{ \
08975da9 3976 static gen_helper_gvec_5_ptr * const fns[4] = { \
6ceabaad
RH
3977 NULL, gen_helper_sve_##name##_h, \
3978 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3979 }; \
3980 return do_fmla(s, a, fns[a->esz]); \
3981}
3982
3983DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3984DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3985DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3986DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3987
3988#undef DO_FMLA
3989
3a7be554 3990static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab 3991{
08975da9
RH
3992 static gen_helper_gvec_5_ptr * const fns[4] = {
3993 NULL,
05f48bab
RH
3994 gen_helper_sve_fcmla_zpzzz_h,
3995 gen_helper_sve_fcmla_zpzzz_s,
3996 gen_helper_sve_fcmla_zpzzz_d,
3997 };
3998
3999 if (a->esz == 0) {
4000 return false;
4001 }
4002 if (sve_access_check(s)) {
4003 unsigned vsz = vec_full_reg_size(s);
08975da9
RH
4004 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4005 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4006 vec_full_reg_offset(s, a->rn),
4007 vec_full_reg_offset(s, a->rm),
4008 vec_full_reg_offset(s, a->ra),
4009 pred_full_reg_offset(s, a->pg),
4010 status, vsz, vsz, a->rot, fns[a->esz]);
4011 tcg_temp_free_ptr(status);
05f48bab
RH
4012 }
4013 return true;
4014}
4015
3a7be554 4016static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405
RH
4017{
4018 static gen_helper_gvec_3_ptr * const fns[2] = {
4019 gen_helper_gvec_fcmlah_idx,
4020 gen_helper_gvec_fcmlas_idx,
4021 };
4022
4023 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4024 tcg_debug_assert(a->rd == a->ra);
4025 if (sve_access_check(s)) {
4026 unsigned vsz = vec_full_reg_size(s);
4027 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4028 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4029 vec_full_reg_offset(s, a->rn),
4030 vec_full_reg_offset(s, a->rm),
4031 status, vsz, vsz,
4032 a->index * 4 + a->rot,
4033 fns[a->esz - 1]);
4034 tcg_temp_free_ptr(status);
4035 }
4036 return true;
4037}
4038
8092c6a3
RH
4039/*
4040 *** SVE Floating Point Unary Operations Predicated Group
4041 */
4042
4043static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4044 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4045{
4046 if (sve_access_check(s)) {
4047 unsigned vsz = vec_full_reg_size(s);
4048 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4049 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4050 vec_full_reg_offset(s, rn),
4051 pred_full_reg_offset(s, pg),
4052 status, vsz, vsz, 0, fn);
4053 tcg_temp_free_ptr(status);
4054 }
4055 return true;
4056}
4057
3a7be554 4058static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4059{
e4ab5124 4060 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4061}
4062
3a7be554 4063static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4064{
4065 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4066}
4067
3a7be554 4068static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4069{
e4ab5124 4070 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4071}
4072
3a7be554 4073static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4074{
4075 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4076}
4077
3a7be554 4078static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4079{
4080 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4081}
4082
3a7be554 4083static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4084{
4085 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4086}
4087
3a7be554 4088static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4089{
4090 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4091}
4092
3a7be554 4093static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4094{
4095 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4096}
4097
3a7be554 4098static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4099{
4100 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4101}
4102
3a7be554 4103static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4104{
4105 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4106}
4107
3a7be554 4108static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4109{
4110 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4111}
4112
3a7be554 4113static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4114{
4115 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4116}
4117
3a7be554 4118static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4119{
4120 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4121}
4122
3a7be554 4123static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4124{
4125 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4126}
4127
3a7be554 4128static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4129{
4130 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4131}
4132
3a7be554 4133static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4134{
4135 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4136}
4137
3a7be554 4138static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4139{
4140 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4141}
4142
3a7be554 4143static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4144{
4145 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4146}
4147
3a7be554 4148static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4149{
4150 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4151}
4152
3a7be554 4153static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4154{
4155 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4156}
4157
cda3c753
RH
4158static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4159 gen_helper_sve_frint_h,
4160 gen_helper_sve_frint_s,
4161 gen_helper_sve_frint_d
4162};
4163
3a7be554 4164static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4165{
4166 if (a->esz == 0) {
4167 return false;
4168 }
4169 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4170 frint_fns[a->esz - 1]);
4171}
4172
3a7be554 4173static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4174{
4175 static gen_helper_gvec_3_ptr * const fns[3] = {
4176 gen_helper_sve_frintx_h,
4177 gen_helper_sve_frintx_s,
4178 gen_helper_sve_frintx_d
4179 };
4180 if (a->esz == 0) {
4181 return false;
4182 }
4183 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4184}
4185
4186static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4187{
4188 if (a->esz == 0) {
4189 return false;
4190 }
4191 if (sve_access_check(s)) {
4192 unsigned vsz = vec_full_reg_size(s);
4193 TCGv_i32 tmode = tcg_const_i32(mode);
4194 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4195
4196 gen_helper_set_rmode(tmode, tmode, status);
4197
4198 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4199 vec_full_reg_offset(s, a->rn),
4200 pred_full_reg_offset(s, a->pg),
4201 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4202
4203 gen_helper_set_rmode(tmode, tmode, status);
4204 tcg_temp_free_i32(tmode);
4205 tcg_temp_free_ptr(status);
4206 }
4207 return true;
4208}
4209
3a7be554 4210static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4211{
4212 return do_frint_mode(s, a, float_round_nearest_even);
4213}
4214
3a7be554 4215static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4216{
4217 return do_frint_mode(s, a, float_round_up);
4218}
4219
3a7be554 4220static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4221{
4222 return do_frint_mode(s, a, float_round_down);
4223}
4224
3a7be554 4225static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4226{
4227 return do_frint_mode(s, a, float_round_to_zero);
4228}
4229
3a7be554 4230static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4231{
4232 return do_frint_mode(s, a, float_round_ties_away);
4233}
4234
3a7be554 4235static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4236{
4237 static gen_helper_gvec_3_ptr * const fns[3] = {
4238 gen_helper_sve_frecpx_h,
4239 gen_helper_sve_frecpx_s,
4240 gen_helper_sve_frecpx_d
4241 };
4242 if (a->esz == 0) {
4243 return false;
4244 }
4245 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4246}
4247
3a7be554 4248static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4249{
4250 static gen_helper_gvec_3_ptr * const fns[3] = {
4251 gen_helper_sve_fsqrt_h,
4252 gen_helper_sve_fsqrt_s,
4253 gen_helper_sve_fsqrt_d
4254 };
4255 if (a->esz == 0) {
4256 return false;
4257 }
4258 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4259}
4260
3a7be554 4261static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4262{
4263 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4264}
4265
3a7be554 4266static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4267{
4268 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4269}
4270
3a7be554 4271static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4272{
4273 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4274}
4275
3a7be554 4276static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4277{
4278 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4279}
4280
3a7be554 4281static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4282{
4283 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4284}
4285
3a7be554 4286static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4287{
4288 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4289}
4290
3a7be554 4291static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4292{
4293 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4294}
4295
3a7be554 4296static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4297{
4298 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4299}
4300
3a7be554 4301static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4302{
4303 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4304}
4305
3a7be554 4306static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4307{
4308 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4309}
4310
3a7be554 4311static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4312{
4313 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4314}
4315
3a7be554 4316static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4317{
4318 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4319}
4320
3a7be554 4321static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4322{
4323 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4324}
4325
3a7be554 4326static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4327{
4328 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4329}
4330
d1822297
RH
4331/*
4332 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4333 */
4334
4335/* Subroutine loading a vector register at VOFS of LEN bytes.
4336 * The load should begin at the address Rn + IMM.
4337 */
4338
19f2acc9 4339static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4340{
19f2acc9
RH
4341 int len_align = QEMU_ALIGN_DOWN(len, 8);
4342 int len_remain = len % 8;
4343 int nparts = len / 8 + ctpop8(len_remain);
d1822297 4344 int midx = get_mem_index(s);
b2aa8879 4345 TCGv_i64 dirty_addr, clean_addr, t0, t1;
d1822297 4346
b2aa8879
RH
4347 dirty_addr = tcg_temp_new_i64();
4348 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4349 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4350 tcg_temp_free_i64(dirty_addr);
d1822297 4351
b2aa8879
RH
4352 /*
4353 * Note that unpredicated load/store of vector/predicate registers
d1822297 4354 * are defined as a stream of bytes, which equates to little-endian
b2aa8879 4355 * operations on larger quantities.
d1822297
RH
4356 * Attempt to keep code expansion to a minimum by limiting the
4357 * amount of unrolling done.
4358 */
4359 if (nparts <= 4) {
4360 int i;
4361
b2aa8879 4362 t0 = tcg_temp_new_i64();
d1822297 4363 for (i = 0; i < len_align; i += 8) {
b2aa8879 4364 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
d1822297 4365 tcg_gen_st_i64(t0, cpu_env, vofs + i);
b2aa8879 4366 tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8);
d1822297 4367 }
b2aa8879 4368 tcg_temp_free_i64(t0);
d1822297
RH
4369 } else {
4370 TCGLabel *loop = gen_new_label();
4371 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4372
b2aa8879
RH
4373 /* Copy the clean address into a local temp, live across the loop. */
4374 t0 = clean_addr;
4b4dc975 4375 clean_addr = new_tmp_a64_local(s);
b2aa8879 4376 tcg_gen_mov_i64(clean_addr, t0);
d1822297 4377
b2aa8879 4378 gen_set_label(loop);
d1822297 4379
b2aa8879
RH
4380 t0 = tcg_temp_new_i64();
4381 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4382 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
d1822297 4383
b2aa8879 4384 tp = tcg_temp_new_ptr();
d1822297
RH
4385 tcg_gen_add_ptr(tp, cpu_env, i);
4386 tcg_gen_addi_ptr(i, i, 8);
4387 tcg_gen_st_i64(t0, tp, vofs);
4388 tcg_temp_free_ptr(tp);
b2aa8879 4389 tcg_temp_free_i64(t0);
d1822297
RH
4390
4391 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4392 tcg_temp_free_ptr(i);
4393 }
4394
b2aa8879
RH
4395 /*
4396 * Predicate register loads can be any multiple of 2.
d1822297
RH
4397 * Note that we still store the entire 64-bit unit into cpu_env.
4398 */
4399 if (len_remain) {
b2aa8879 4400 t0 = tcg_temp_new_i64();
d1822297
RH
4401 switch (len_remain) {
4402 case 2:
4403 case 4:
4404 case 8:
b2aa8879
RH
4405 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4406 MO_LE | ctz32(len_remain));
d1822297
RH
4407 break;
4408
4409 case 6:
4410 t1 = tcg_temp_new_i64();
b2aa8879
RH
4411 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4412 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4413 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
d1822297
RH
4414 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4415 tcg_temp_free_i64(t1);
4416 break;
4417
4418 default:
4419 g_assert_not_reached();
4420 }
4421 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
b2aa8879 4422 tcg_temp_free_i64(t0);
d1822297 4423 }
d1822297
RH
4424}
4425
5047c204 4426/* Similarly for stores. */
19f2acc9 4427static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4428{
19f2acc9
RH
4429 int len_align = QEMU_ALIGN_DOWN(len, 8);
4430 int len_remain = len % 8;
4431 int nparts = len / 8 + ctpop8(len_remain);
5047c204 4432 int midx = get_mem_index(s);
bba87d0a 4433 TCGv_i64 dirty_addr, clean_addr, t0;
5047c204 4434
bba87d0a
RH
4435 dirty_addr = tcg_temp_new_i64();
4436 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4437 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4438 tcg_temp_free_i64(dirty_addr);
5047c204
RH
4439
4440 /* Note that unpredicated load/store of vector/predicate registers
4441 * are defined as a stream of bytes, which equates to little-endian
4442 * operations on larger quantities. There is no nice way to force
4443 * a little-endian store for aarch64_be-linux-user out of line.
4444 *
4445 * Attempt to keep code expansion to a minimum by limiting the
4446 * amount of unrolling done.
4447 */
4448 if (nparts <= 4) {
4449 int i;
4450
bba87d0a 4451 t0 = tcg_temp_new_i64();
5047c204
RH
4452 for (i = 0; i < len_align; i += 8) {
4453 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
bba87d0a
RH
4454 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4455 tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8);
5047c204 4456 }
bba87d0a 4457 tcg_temp_free_i64(t0);
5047c204
RH
4458 } else {
4459 TCGLabel *loop = gen_new_label();
bba87d0a 4460 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5047c204 4461
bba87d0a
RH
4462 /* Copy the clean address into a local temp, live across the loop. */
4463 t0 = clean_addr;
4b4dc975 4464 clean_addr = new_tmp_a64_local(s);
bba87d0a 4465 tcg_gen_mov_i64(clean_addr, t0);
5047c204 4466
bba87d0a 4467 gen_set_label(loop);
5047c204 4468
bba87d0a
RH
4469 t0 = tcg_temp_new_i64();
4470 tp = tcg_temp_new_ptr();
4471 tcg_gen_add_ptr(tp, cpu_env, i);
4472 tcg_gen_ld_i64(t0, tp, vofs);
5047c204 4473 tcg_gen_addi_ptr(i, i, 8);
bba87d0a
RH
4474 tcg_temp_free_ptr(tp);
4475
4476 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4477 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4478 tcg_temp_free_i64(t0);
5047c204
RH
4479
4480 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4481 tcg_temp_free_ptr(i);
4482 }
4483
4484 /* Predicate register stores can be any multiple of 2. */
4485 if (len_remain) {
bba87d0a 4486 t0 = tcg_temp_new_i64();
5047c204 4487 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5047c204
RH
4488
4489 switch (len_remain) {
4490 case 2:
4491 case 4:
4492 case 8:
bba87d0a
RH
4493 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4494 MO_LE | ctz32(len_remain));
5047c204
RH
4495 break;
4496
4497 case 6:
bba87d0a
RH
4498 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4499 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5047c204 4500 tcg_gen_shri_i64(t0, t0, 32);
bba87d0a 4501 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5047c204
RH
4502 break;
4503
4504 default:
4505 g_assert_not_reached();
4506 }
bba87d0a 4507 tcg_temp_free_i64(t0);
5047c204 4508 }
5047c204
RH
4509}
4510
3a7be554 4511static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4512{
4513 if (sve_access_check(s)) {
4514 int size = vec_full_reg_size(s);
4515 int off = vec_full_reg_offset(s, a->rd);
4516 do_ldr(s, off, size, a->rn, a->imm * size);
4517 }
4518 return true;
4519}
4520
3a7be554 4521static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4522{
4523 if (sve_access_check(s)) {
4524 int size = pred_full_reg_size(s);
4525 int off = pred_full_reg_offset(s, a->rd);
4526 do_ldr(s, off, size, a->rn, a->imm * size);
4527 }
4528 return true;
4529}
c4e7c493 4530
3a7be554 4531static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4532{
4533 if (sve_access_check(s)) {
4534 int size = vec_full_reg_size(s);
4535 int off = vec_full_reg_offset(s, a->rd);
4536 do_str(s, off, size, a->rn, a->imm * size);
4537 }
4538 return true;
4539}
4540
3a7be554 4541static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4542{
4543 if (sve_access_check(s)) {
4544 int size = pred_full_reg_size(s);
4545 int off = pred_full_reg_offset(s, a->rd);
4546 do_str(s, off, size, a->rn, a->imm * size);
4547 }
4548 return true;
4549}
4550
c4e7c493
RH
4551/*
4552 *** SVE Memory - Contiguous Load Group
4553 */
4554
4555/* The memory mode of the dtype. */
14776ab5 4556static const MemOp dtype_mop[16] = {
c4e7c493
RH
4557 MO_UB, MO_UB, MO_UB, MO_UB,
4558 MO_SL, MO_UW, MO_UW, MO_UW,
4559 MO_SW, MO_SW, MO_UL, MO_UL,
4560 MO_SB, MO_SB, MO_SB, MO_Q
4561};
4562
4563#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4564
4565/* The vector element size of dtype. */
4566static const uint8_t dtype_esz[16] = {
4567 0, 1, 2, 3,
4568 3, 1, 2, 3,
4569 3, 2, 2, 3,
4570 3, 2, 1, 3
4571};
4572
4573static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
206adacf
RH
4574 int dtype, uint32_t mte_n, bool is_write,
4575 gen_helper_gvec_mem *fn)
c4e7c493
RH
4576{
4577 unsigned vsz = vec_full_reg_size(s);
4578 TCGv_ptr t_pg;
500d0484 4579 TCGv_i32 t_desc;
206adacf 4580 int desc = 0;
c4e7c493 4581
206adacf
RH
4582 /*
4583 * For e.g. LD4, there are not enough arguments to pass all 4
c4e7c493
RH
4584 * registers as pointers, so encode the regno into the data field.
4585 * For consistency, do this even for LD1.
4586 */
9473d0ec 4587 if (s->mte_active[0]) {
206adacf
RH
4588 int msz = dtype_msz(dtype);
4589
4590 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4591 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4592 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4593 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4594 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
4595 desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz);
4596 desc <<= SVE_MTEDESC_SHIFT;
9473d0ec
RH
4597 } else {
4598 addr = clean_data_tbi(s, addr);
206adacf 4599 }
9473d0ec 4600
206adacf 4601 desc = simd_desc(vsz, vsz, zt | desc);
500d0484 4602 t_desc = tcg_const_i32(desc);
c4e7c493
RH
4603 t_pg = tcg_temp_new_ptr();
4604
4605 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
500d0484 4606 fn(cpu_env, t_pg, addr, t_desc);
c4e7c493
RH
4607
4608 tcg_temp_free_ptr(t_pg);
500d0484 4609 tcg_temp_free_i32(t_desc);
c4e7c493
RH
4610}
4611
4612static void do_ld_zpa(DisasContext *s, int zt, int pg,
4613 TCGv_i64 addr, int dtype, int nreg)
4614{
206adacf
RH
4615 static gen_helper_gvec_mem * const fns[2][2][16][4] = {
4616 { /* mte inactive, little-endian */
4617 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
7d0a57a2 4618 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
206adacf
RH
4619 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4620 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4621 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4622
4623 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4624 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4625 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4626 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4627 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4628
4629 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4630 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4631 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4632 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4633 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4634
4635 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4636 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4637 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4638 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4639 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4640
4641 /* mte inactive, big-endian */
4642 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4643 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4644 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4645 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4646 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4647
4648 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4649 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4650 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4651 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4652 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4653
4654 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4655 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4656 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4657 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4658 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4659
4660 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4661 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4662 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4663 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4664 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4665
4666 { /* mte active, little-endian */
4667 { { gen_helper_sve_ld1bb_r_mte,
4668 gen_helper_sve_ld2bb_r_mte,
4669 gen_helper_sve_ld3bb_r_mte,
4670 gen_helper_sve_ld4bb_r_mte },
4671 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4672 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4673 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4674
4675 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4676 { gen_helper_sve_ld1hh_le_r_mte,
4677 gen_helper_sve_ld2hh_le_r_mte,
4678 gen_helper_sve_ld3hh_le_r_mte,
4679 gen_helper_sve_ld4hh_le_r_mte },
4680 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4681 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4682
4683 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4684 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4685 { gen_helper_sve_ld1ss_le_r_mte,
4686 gen_helper_sve_ld2ss_le_r_mte,
4687 gen_helper_sve_ld3ss_le_r_mte,
4688 gen_helper_sve_ld4ss_le_r_mte },
4689 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4690
4691 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4692 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4693 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4694 { gen_helper_sve_ld1dd_le_r_mte,
4695 gen_helper_sve_ld2dd_le_r_mte,
4696 gen_helper_sve_ld3dd_le_r_mte,
4697 gen_helper_sve_ld4dd_le_r_mte } },
4698
4699 /* mte active, big-endian */
4700 { { gen_helper_sve_ld1bb_r_mte,
4701 gen_helper_sve_ld2bb_r_mte,
4702 gen_helper_sve_ld3bb_r_mte,
4703 gen_helper_sve_ld4bb_r_mte },
4704 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4705 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4706 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4707
4708 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4709 { gen_helper_sve_ld1hh_be_r_mte,
4710 gen_helper_sve_ld2hh_be_r_mte,
4711 gen_helper_sve_ld3hh_be_r_mte,
4712 gen_helper_sve_ld4hh_be_r_mte },
4713 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4714 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4715
4716 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4717 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4718 { gen_helper_sve_ld1ss_be_r_mte,
4719 gen_helper_sve_ld2ss_be_r_mte,
4720 gen_helper_sve_ld3ss_be_r_mte,
4721 gen_helper_sve_ld4ss_be_r_mte },
4722 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4723
4724 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4725 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4726 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4727 { gen_helper_sve_ld1dd_be_r_mte,
4728 gen_helper_sve_ld2dd_be_r_mte,
4729 gen_helper_sve_ld3dd_be_r_mte,
4730 gen_helper_sve_ld4dd_be_r_mte } } },
c4e7c493 4731 };
206adacf
RH
4732 gen_helper_gvec_mem *fn
4733 = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
c4e7c493 4734
206adacf
RH
4735 /*
4736 * While there are holes in the table, they are not
c4e7c493
RH
4737 * accessible via the instruction encoding.
4738 */
4739 assert(fn != NULL);
206adacf 4740 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
c4e7c493
RH
4741}
4742
3a7be554 4743static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4744{
4745 if (a->rm == 31) {
4746 return false;
4747 }
4748 if (sve_access_check(s)) {
4749 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4750 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4751 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4752 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4753 }
4754 return true;
4755}
4756
3a7be554 4757static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4758{
4759 if (sve_access_check(s)) {
4760 int vsz = vec_full_reg_size(s);
4761 int elements = vsz >> dtype_esz[a->dtype];
4762 TCGv_i64 addr = new_tmp_a64(s);
4763
4764 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4765 (a->imm * elements * (a->nreg + 1))
4766 << dtype_msz(a->dtype));
4767 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4768 }
4769 return true;
4770}
e2654d75 4771
3a7be554 4772static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4773{
aa13f7c3
RH
4774 static gen_helper_gvec_mem * const fns[2][2][16] = {
4775 { /* mte inactive, little-endian */
4776 { gen_helper_sve_ldff1bb_r,
4777 gen_helper_sve_ldff1bhu_r,
4778 gen_helper_sve_ldff1bsu_r,
4779 gen_helper_sve_ldff1bdu_r,
4780
4781 gen_helper_sve_ldff1sds_le_r,
4782 gen_helper_sve_ldff1hh_le_r,
4783 gen_helper_sve_ldff1hsu_le_r,
4784 gen_helper_sve_ldff1hdu_le_r,
4785
4786 gen_helper_sve_ldff1hds_le_r,
4787 gen_helper_sve_ldff1hss_le_r,
4788 gen_helper_sve_ldff1ss_le_r,
4789 gen_helper_sve_ldff1sdu_le_r,
4790
4791 gen_helper_sve_ldff1bds_r,
4792 gen_helper_sve_ldff1bss_r,
4793 gen_helper_sve_ldff1bhs_r,
4794 gen_helper_sve_ldff1dd_le_r },
4795
4796 /* mte inactive, big-endian */
4797 { gen_helper_sve_ldff1bb_r,
4798 gen_helper_sve_ldff1bhu_r,
4799 gen_helper_sve_ldff1bsu_r,
4800 gen_helper_sve_ldff1bdu_r,
4801
4802 gen_helper_sve_ldff1sds_be_r,
4803 gen_helper_sve_ldff1hh_be_r,
4804 gen_helper_sve_ldff1hsu_be_r,
4805 gen_helper_sve_ldff1hdu_be_r,
4806
4807 gen_helper_sve_ldff1hds_be_r,
4808 gen_helper_sve_ldff1hss_be_r,
4809 gen_helper_sve_ldff1ss_be_r,
4810 gen_helper_sve_ldff1sdu_be_r,
4811
4812 gen_helper_sve_ldff1bds_r,
4813 gen_helper_sve_ldff1bss_r,
4814 gen_helper_sve_ldff1bhs_r,
4815 gen_helper_sve_ldff1dd_be_r } },
4816
4817 { /* mte active, little-endian */
4818 { gen_helper_sve_ldff1bb_r_mte,
4819 gen_helper_sve_ldff1bhu_r_mte,
4820 gen_helper_sve_ldff1bsu_r_mte,
4821 gen_helper_sve_ldff1bdu_r_mte,
4822
4823 gen_helper_sve_ldff1sds_le_r_mte,
4824 gen_helper_sve_ldff1hh_le_r_mte,
4825 gen_helper_sve_ldff1hsu_le_r_mte,
4826 gen_helper_sve_ldff1hdu_le_r_mte,
4827
4828 gen_helper_sve_ldff1hds_le_r_mte,
4829 gen_helper_sve_ldff1hss_le_r_mte,
4830 gen_helper_sve_ldff1ss_le_r_mte,
4831 gen_helper_sve_ldff1sdu_le_r_mte,
4832
4833 gen_helper_sve_ldff1bds_r_mte,
4834 gen_helper_sve_ldff1bss_r_mte,
4835 gen_helper_sve_ldff1bhs_r_mte,
4836 gen_helper_sve_ldff1dd_le_r_mte },
4837
4838 /* mte active, big-endian */
4839 { gen_helper_sve_ldff1bb_r_mte,
4840 gen_helper_sve_ldff1bhu_r_mte,
4841 gen_helper_sve_ldff1bsu_r_mte,
4842 gen_helper_sve_ldff1bdu_r_mte,
4843
4844 gen_helper_sve_ldff1sds_be_r_mte,
4845 gen_helper_sve_ldff1hh_be_r_mte,
4846 gen_helper_sve_ldff1hsu_be_r_mte,
4847 gen_helper_sve_ldff1hdu_be_r_mte,
4848
4849 gen_helper_sve_ldff1hds_be_r_mte,
4850 gen_helper_sve_ldff1hss_be_r_mte,
4851 gen_helper_sve_ldff1ss_be_r_mte,
4852 gen_helper_sve_ldff1sdu_be_r_mte,
4853
4854 gen_helper_sve_ldff1bds_r_mte,
4855 gen_helper_sve_ldff1bss_r_mte,
4856 gen_helper_sve_ldff1bhs_r_mte,
4857 gen_helper_sve_ldff1dd_be_r_mte } },
e2654d75
RH
4858 };
4859
4860 if (sve_access_check(s)) {
4861 TCGv_i64 addr = new_tmp_a64(s);
4862 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4863 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
aa13f7c3
RH
4864 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4865 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4866 }
4867 return true;
4868}
4869
3a7be554 4870static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4871{
aa13f7c3
RH
4872 static gen_helper_gvec_mem * const fns[2][2][16] = {
4873 { /* mte inactive, little-endian */
4874 { gen_helper_sve_ldnf1bb_r,
4875 gen_helper_sve_ldnf1bhu_r,
4876 gen_helper_sve_ldnf1bsu_r,
4877 gen_helper_sve_ldnf1bdu_r,
4878
4879 gen_helper_sve_ldnf1sds_le_r,
4880 gen_helper_sve_ldnf1hh_le_r,
4881 gen_helper_sve_ldnf1hsu_le_r,
4882 gen_helper_sve_ldnf1hdu_le_r,
4883
4884 gen_helper_sve_ldnf1hds_le_r,
4885 gen_helper_sve_ldnf1hss_le_r,
4886 gen_helper_sve_ldnf1ss_le_r,
4887 gen_helper_sve_ldnf1sdu_le_r,
4888
4889 gen_helper_sve_ldnf1bds_r,
4890 gen_helper_sve_ldnf1bss_r,
4891 gen_helper_sve_ldnf1bhs_r,
4892 gen_helper_sve_ldnf1dd_le_r },
4893
4894 /* mte inactive, big-endian */
4895 { gen_helper_sve_ldnf1bb_r,
4896 gen_helper_sve_ldnf1bhu_r,
4897 gen_helper_sve_ldnf1bsu_r,
4898 gen_helper_sve_ldnf1bdu_r,
4899
4900 gen_helper_sve_ldnf1sds_be_r,
4901 gen_helper_sve_ldnf1hh_be_r,
4902 gen_helper_sve_ldnf1hsu_be_r,
4903 gen_helper_sve_ldnf1hdu_be_r,
4904
4905 gen_helper_sve_ldnf1hds_be_r,
4906 gen_helper_sve_ldnf1hss_be_r,
4907 gen_helper_sve_ldnf1ss_be_r,
4908 gen_helper_sve_ldnf1sdu_be_r,
4909
4910 gen_helper_sve_ldnf1bds_r,
4911 gen_helper_sve_ldnf1bss_r,
4912 gen_helper_sve_ldnf1bhs_r,
4913 gen_helper_sve_ldnf1dd_be_r } },
4914
4915 { /* mte inactive, little-endian */
4916 { gen_helper_sve_ldnf1bb_r_mte,
4917 gen_helper_sve_ldnf1bhu_r_mte,
4918 gen_helper_sve_ldnf1bsu_r_mte,
4919 gen_helper_sve_ldnf1bdu_r_mte,
4920
4921 gen_helper_sve_ldnf1sds_le_r_mte,
4922 gen_helper_sve_ldnf1hh_le_r_mte,
4923 gen_helper_sve_ldnf1hsu_le_r_mte,
4924 gen_helper_sve_ldnf1hdu_le_r_mte,
4925
4926 gen_helper_sve_ldnf1hds_le_r_mte,
4927 gen_helper_sve_ldnf1hss_le_r_mte,
4928 gen_helper_sve_ldnf1ss_le_r_mte,
4929 gen_helper_sve_ldnf1sdu_le_r_mte,
4930
4931 gen_helper_sve_ldnf1bds_r_mte,
4932 gen_helper_sve_ldnf1bss_r_mte,
4933 gen_helper_sve_ldnf1bhs_r_mte,
4934 gen_helper_sve_ldnf1dd_le_r_mte },
4935
4936 /* mte inactive, big-endian */
4937 { gen_helper_sve_ldnf1bb_r_mte,
4938 gen_helper_sve_ldnf1bhu_r_mte,
4939 gen_helper_sve_ldnf1bsu_r_mte,
4940 gen_helper_sve_ldnf1bdu_r_mte,
4941
4942 gen_helper_sve_ldnf1sds_be_r_mte,
4943 gen_helper_sve_ldnf1hh_be_r_mte,
4944 gen_helper_sve_ldnf1hsu_be_r_mte,
4945 gen_helper_sve_ldnf1hdu_be_r_mte,
4946
4947 gen_helper_sve_ldnf1hds_be_r_mte,
4948 gen_helper_sve_ldnf1hss_be_r_mte,
4949 gen_helper_sve_ldnf1ss_be_r_mte,
4950 gen_helper_sve_ldnf1sdu_be_r_mte,
4951
4952 gen_helper_sve_ldnf1bds_r_mte,
4953 gen_helper_sve_ldnf1bss_r_mte,
4954 gen_helper_sve_ldnf1bhs_r_mte,
4955 gen_helper_sve_ldnf1dd_be_r_mte } },
e2654d75
RH
4956 };
4957
4958 if (sve_access_check(s)) {
4959 int vsz = vec_full_reg_size(s);
4960 int elements = vsz >> dtype_esz[a->dtype];
4961 int off = (a->imm * elements) << dtype_msz(a->dtype);
4962 TCGv_i64 addr = new_tmp_a64(s);
4963
4964 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
aa13f7c3
RH
4965 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4966 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4967 }
4968 return true;
4969}
1a039c7e 4970
05abe304
RH
4971static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4972{
7d0a57a2
RH
4973 static gen_helper_gvec_mem * const fns[2][4] = {
4974 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4975 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4976 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4977 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
05abe304
RH
4978 };
4979 unsigned vsz = vec_full_reg_size(s);
4980 TCGv_ptr t_pg;
500d0484
RH
4981 TCGv_i32 t_desc;
4982 int desc, poff;
05abe304
RH
4983
4984 /* Load the first quadword using the normal predicated load helpers. */
ba080b86 4985 desc = simd_desc(16, 16, zt);
500d0484 4986 t_desc = tcg_const_i32(desc);
2a99ab2b
RH
4987
4988 poff = pred_full_reg_offset(s, pg);
4989 if (vsz > 16) {
4990 /*
4991 * Zero-extend the first 16 bits of the predicate into a temporary.
4992 * This avoids triggering an assert making sure we don't have bits
4993 * set within a predicate beyond VQ, but we have lowered VQ to 1
4994 * for this load operation.
4995 */
4996 TCGv_i64 tmp = tcg_temp_new_i64();
4997#ifdef HOST_WORDS_BIGENDIAN
4998 poff += 6;
4999#endif
5000 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5001
5002 poff = offsetof(CPUARMState, vfp.preg_tmp);
5003 tcg_gen_st_i64(tmp, cpu_env, poff);
5004 tcg_temp_free_i64(tmp);
5005 }
5006
05abe304 5007 t_pg = tcg_temp_new_ptr();
2a99ab2b 5008 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 5009
500d0484 5010 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
05abe304
RH
5011
5012 tcg_temp_free_ptr(t_pg);
500d0484 5013 tcg_temp_free_i32(t_desc);
05abe304
RH
5014
5015 /* Replicate that first quadword. */
5016 if (vsz > 16) {
5017 unsigned dofs = vec_full_reg_offset(s, zt);
5018 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
5019 }
5020}
5021
3a7be554 5022static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
5023{
5024 if (a->rm == 31) {
5025 return false;
5026 }
5027 if (sve_access_check(s)) {
5028 int msz = dtype_msz(a->dtype);
5029 TCGv_i64 addr = new_tmp_a64(s);
5030 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5031 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5032 do_ldrq(s, a->rd, a->pg, addr, msz);
5033 }
5034 return true;
5035}
5036
3a7be554 5037static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
5038{
5039 if (sve_access_check(s)) {
5040 TCGv_i64 addr = new_tmp_a64(s);
5041 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
5042 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
5043 }
5044 return true;
5045}
5046
68459864 5047/* Load and broadcast element. */
3a7be554 5048static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864 5049{
68459864
RH
5050 unsigned vsz = vec_full_reg_size(s);
5051 unsigned psz = pred_full_reg_size(s);
5052 unsigned esz = dtype_esz[a->dtype];
d0e372b0 5053 unsigned msz = dtype_msz(a->dtype);
c0ed9166 5054 TCGLabel *over;
4ac430e1 5055 TCGv_i64 temp, clean_addr;
68459864 5056
c0ed9166
RH
5057 if (!sve_access_check(s)) {
5058 return true;
5059 }
5060
5061 over = gen_new_label();
5062
68459864
RH
5063 /* If the guarding predicate has no bits set, no load occurs. */
5064 if (psz <= 8) {
5065 /* Reduce the pred_esz_masks value simply to reduce the
5066 * size of the code generated here.
5067 */
5068 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5069 temp = tcg_temp_new_i64();
5070 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5071 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5072 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5073 tcg_temp_free_i64(temp);
5074 } else {
5075 TCGv_i32 t32 = tcg_temp_new_i32();
5076 find_last_active(s, t32, esz, a->pg);
5077 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5078 tcg_temp_free_i32(t32);
5079 }
5080
5081 /* Load the data. */
5082 temp = tcg_temp_new_i64();
d0e372b0 5083 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4ac430e1
RH
5084 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5085
5086 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
68459864
RH
5087 s->be_data | dtype_mop[a->dtype]);
5088
5089 /* Broadcast to *all* elements. */
5090 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5091 vsz, vsz, temp);
5092 tcg_temp_free_i64(temp);
5093
5094 /* Zero the inactive elements. */
5095 gen_set_label(over);
5096 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
5097 return true;
5098}
5099
1a039c7e
RH
5100static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5101 int msz, int esz, int nreg)
5102{
71b9f394
RH
5103 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5104 { { { gen_helper_sve_st1bb_r,
5105 gen_helper_sve_st1bh_r,
5106 gen_helper_sve_st1bs_r,
5107 gen_helper_sve_st1bd_r },
5108 { NULL,
5109 gen_helper_sve_st1hh_le_r,
5110 gen_helper_sve_st1hs_le_r,
5111 gen_helper_sve_st1hd_le_r },
5112 { NULL, NULL,
5113 gen_helper_sve_st1ss_le_r,
5114 gen_helper_sve_st1sd_le_r },
5115 { NULL, NULL, NULL,
5116 gen_helper_sve_st1dd_le_r } },
5117 { { gen_helper_sve_st1bb_r,
5118 gen_helper_sve_st1bh_r,
5119 gen_helper_sve_st1bs_r,
5120 gen_helper_sve_st1bd_r },
5121 { NULL,
5122 gen_helper_sve_st1hh_be_r,
5123 gen_helper_sve_st1hs_be_r,
5124 gen_helper_sve_st1hd_be_r },
5125 { NULL, NULL,
5126 gen_helper_sve_st1ss_be_r,
5127 gen_helper_sve_st1sd_be_r },
5128 { NULL, NULL, NULL,
5129 gen_helper_sve_st1dd_be_r } } },
5130
5131 { { { gen_helper_sve_st1bb_r_mte,
5132 gen_helper_sve_st1bh_r_mte,
5133 gen_helper_sve_st1bs_r_mte,
5134 gen_helper_sve_st1bd_r_mte },
5135 { NULL,
5136 gen_helper_sve_st1hh_le_r_mte,
5137 gen_helper_sve_st1hs_le_r_mte,
5138 gen_helper_sve_st1hd_le_r_mte },
5139 { NULL, NULL,
5140 gen_helper_sve_st1ss_le_r_mte,
5141 gen_helper_sve_st1sd_le_r_mte },
5142 { NULL, NULL, NULL,
5143 gen_helper_sve_st1dd_le_r_mte } },
5144 { { gen_helper_sve_st1bb_r_mte,
5145 gen_helper_sve_st1bh_r_mte,
5146 gen_helper_sve_st1bs_r_mte,
5147 gen_helper_sve_st1bd_r_mte },
5148 { NULL,
5149 gen_helper_sve_st1hh_be_r_mte,
5150 gen_helper_sve_st1hs_be_r_mte,
5151 gen_helper_sve_st1hd_be_r_mte },
5152 { NULL, NULL,
5153 gen_helper_sve_st1ss_be_r_mte,
5154 gen_helper_sve_st1sd_be_r_mte },
5155 { NULL, NULL, NULL,
5156 gen_helper_sve_st1dd_be_r_mte } } },
1a039c7e 5157 };
71b9f394
RH
5158 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5159 { { { gen_helper_sve_st2bb_r,
5160 gen_helper_sve_st2hh_le_r,
5161 gen_helper_sve_st2ss_le_r,
5162 gen_helper_sve_st2dd_le_r },
5163 { gen_helper_sve_st3bb_r,
5164 gen_helper_sve_st3hh_le_r,
5165 gen_helper_sve_st3ss_le_r,
5166 gen_helper_sve_st3dd_le_r },
5167 { gen_helper_sve_st4bb_r,
5168 gen_helper_sve_st4hh_le_r,
5169 gen_helper_sve_st4ss_le_r,
5170 gen_helper_sve_st4dd_le_r } },
5171 { { gen_helper_sve_st2bb_r,
5172 gen_helper_sve_st2hh_be_r,
5173 gen_helper_sve_st2ss_be_r,
5174 gen_helper_sve_st2dd_be_r },
5175 { gen_helper_sve_st3bb_r,
5176 gen_helper_sve_st3hh_be_r,
5177 gen_helper_sve_st3ss_be_r,
5178 gen_helper_sve_st3dd_be_r },
5179 { gen_helper_sve_st4bb_r,
5180 gen_helper_sve_st4hh_be_r,
5181 gen_helper_sve_st4ss_be_r,
5182 gen_helper_sve_st4dd_be_r } } },
5183 { { { gen_helper_sve_st2bb_r_mte,
5184 gen_helper_sve_st2hh_le_r_mte,
5185 gen_helper_sve_st2ss_le_r_mte,
5186 gen_helper_sve_st2dd_le_r_mte },
5187 { gen_helper_sve_st3bb_r_mte,
5188 gen_helper_sve_st3hh_le_r_mte,
5189 gen_helper_sve_st3ss_le_r_mte,
5190 gen_helper_sve_st3dd_le_r_mte },
5191 { gen_helper_sve_st4bb_r_mte,
5192 gen_helper_sve_st4hh_le_r_mte,
5193 gen_helper_sve_st4ss_le_r_mte,
5194 gen_helper_sve_st4dd_le_r_mte } },
5195 { { gen_helper_sve_st2bb_r_mte,
5196 gen_helper_sve_st2hh_be_r_mte,
5197 gen_helper_sve_st2ss_be_r_mte,
5198 gen_helper_sve_st2dd_be_r_mte },
5199 { gen_helper_sve_st3bb_r_mte,
5200 gen_helper_sve_st3hh_be_r_mte,
5201 gen_helper_sve_st3ss_be_r_mte,
5202 gen_helper_sve_st3dd_be_r_mte },
5203 { gen_helper_sve_st4bb_r_mte,
5204 gen_helper_sve_st4hh_be_r_mte,
5205 gen_helper_sve_st4ss_be_r_mte,
5206 gen_helper_sve_st4dd_be_r_mte } } },
1a039c7e
RH
5207 };
5208 gen_helper_gvec_mem *fn;
28d57f2d 5209 int be = s->be_data == MO_BE;
1a039c7e
RH
5210
5211 if (nreg == 0) {
5212 /* ST1 */
71b9f394
RH
5213 fn = fn_single[s->mte_active[0]][be][msz][esz];
5214 nreg = 1;
1a039c7e
RH
5215 } else {
5216 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5217 assert(msz == esz);
71b9f394 5218 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
1a039c7e
RH
5219 }
5220 assert(fn != NULL);
71b9f394 5221 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
1a039c7e
RH
5222}
5223
3a7be554 5224static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5225{
5226 if (a->rm == 31 || a->msz > a->esz) {
5227 return false;
5228 }
5229 if (sve_access_check(s)) {
5230 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5231 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5232 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5233 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5234 }
5235 return true;
5236}
5237
3a7be554 5238static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5239{
5240 if (a->msz > a->esz) {
5241 return false;
5242 }
5243 if (sve_access_check(s)) {
5244 int vsz = vec_full_reg_size(s);
5245 int elements = vsz >> a->esz;
5246 TCGv_i64 addr = new_tmp_a64(s);
5247
5248 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5249 (a->imm * elements * (a->nreg + 1)) << a->msz);
5250 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5251 }
5252 return true;
5253}
f6dbf62a
RH
5254
5255/*
5256 *** SVE gather loads / scatter stores
5257 */
5258
500d0484 5259static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
d28d12f0 5260 int scale, TCGv_i64 scalar, int msz, bool is_write,
500d0484 5261 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5262{
5263 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5264 TCGv_ptr t_zm = tcg_temp_new_ptr();
5265 TCGv_ptr t_pg = tcg_temp_new_ptr();
5266 TCGv_ptr t_zt = tcg_temp_new_ptr();
500d0484 5267 TCGv_i32 t_desc;
d28d12f0 5268 int desc = 0;
500d0484 5269
d28d12f0
RH
5270 if (s->mte_active[0]) {
5271 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5272 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5273 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5274 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5275 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
5276 desc <<= SVE_MTEDESC_SHIFT;
5277 }
ba080b86 5278 desc = simd_desc(vsz, vsz, scale);
500d0484 5279 t_desc = tcg_const_i32(desc);
f6dbf62a
RH
5280
5281 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5282 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5283 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
500d0484 5284 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
f6dbf62a
RH
5285
5286 tcg_temp_free_ptr(t_zt);
5287 tcg_temp_free_ptr(t_zm);
5288 tcg_temp_free_ptr(t_pg);
500d0484 5289 tcg_temp_free_i32(t_desc);
f6dbf62a
RH
5290}
5291
d28d12f0
RH
5292/* Indexed by [mte][be][ff][xs][u][msz]. */
5293static gen_helper_gvec_mem_scatter * const
5294gather_load_fn32[2][2][2][2][2][3] = {
5295 { /* MTE Inactive */
5296 { /* Little-endian */
5297 { { { gen_helper_sve_ldbss_zsu,
5298 gen_helper_sve_ldhss_le_zsu,
5299 NULL, },
5300 { gen_helper_sve_ldbsu_zsu,
5301 gen_helper_sve_ldhsu_le_zsu,
5302 gen_helper_sve_ldss_le_zsu, } },
5303 { { gen_helper_sve_ldbss_zss,
5304 gen_helper_sve_ldhss_le_zss,
5305 NULL, },
5306 { gen_helper_sve_ldbsu_zss,
5307 gen_helper_sve_ldhsu_le_zss,
5308 gen_helper_sve_ldss_le_zss, } } },
5309
5310 /* First-fault */
5311 { { { gen_helper_sve_ldffbss_zsu,
5312 gen_helper_sve_ldffhss_le_zsu,
5313 NULL, },
5314 { gen_helper_sve_ldffbsu_zsu,
5315 gen_helper_sve_ldffhsu_le_zsu,
5316 gen_helper_sve_ldffss_le_zsu, } },
5317 { { gen_helper_sve_ldffbss_zss,
5318 gen_helper_sve_ldffhss_le_zss,
5319 NULL, },
5320 { gen_helper_sve_ldffbsu_zss,
5321 gen_helper_sve_ldffhsu_le_zss,
5322 gen_helper_sve_ldffss_le_zss, } } } },
5323
5324 { /* Big-endian */
5325 { { { gen_helper_sve_ldbss_zsu,
5326 gen_helper_sve_ldhss_be_zsu,
5327 NULL, },
5328 { gen_helper_sve_ldbsu_zsu,
5329 gen_helper_sve_ldhsu_be_zsu,
5330 gen_helper_sve_ldss_be_zsu, } },
5331 { { gen_helper_sve_ldbss_zss,
5332 gen_helper_sve_ldhss_be_zss,
5333 NULL, },
5334 { gen_helper_sve_ldbsu_zss,
5335 gen_helper_sve_ldhsu_be_zss,
5336 gen_helper_sve_ldss_be_zss, } } },
5337
5338 /* First-fault */
5339 { { { gen_helper_sve_ldffbss_zsu,
5340 gen_helper_sve_ldffhss_be_zsu,
5341 NULL, },
5342 { gen_helper_sve_ldffbsu_zsu,
5343 gen_helper_sve_ldffhsu_be_zsu,
5344 gen_helper_sve_ldffss_be_zsu, } },
5345 { { gen_helper_sve_ldffbss_zss,
5346 gen_helper_sve_ldffhss_be_zss,
5347 NULL, },
5348 { gen_helper_sve_ldffbsu_zss,
5349 gen_helper_sve_ldffhsu_be_zss,
5350 gen_helper_sve_ldffss_be_zss, } } } } },
5351 { /* MTE Active */
5352 { /* Little-endian */
5353 { { { gen_helper_sve_ldbss_zsu_mte,
5354 gen_helper_sve_ldhss_le_zsu_mte,
5355 NULL, },
5356 { gen_helper_sve_ldbsu_zsu_mte,
5357 gen_helper_sve_ldhsu_le_zsu_mte,
5358 gen_helper_sve_ldss_le_zsu_mte, } },
5359 { { gen_helper_sve_ldbss_zss_mte,
5360 gen_helper_sve_ldhss_le_zss_mte,
5361 NULL, },
5362 { gen_helper_sve_ldbsu_zss_mte,
5363 gen_helper_sve_ldhsu_le_zss_mte,
5364 gen_helper_sve_ldss_le_zss_mte, } } },
5365
5366 /* First-fault */
5367 { { { gen_helper_sve_ldffbss_zsu_mte,
5368 gen_helper_sve_ldffhss_le_zsu_mte,
5369 NULL, },
5370 { gen_helper_sve_ldffbsu_zsu_mte,
5371 gen_helper_sve_ldffhsu_le_zsu_mte,
5372 gen_helper_sve_ldffss_le_zsu_mte, } },
5373 { { gen_helper_sve_ldffbss_zss_mte,
5374 gen_helper_sve_ldffhss_le_zss_mte,
5375 NULL, },
5376 { gen_helper_sve_ldffbsu_zss_mte,
5377 gen_helper_sve_ldffhsu_le_zss_mte,
5378 gen_helper_sve_ldffss_le_zss_mte, } } } },
5379
5380 { /* Big-endian */
5381 { { { gen_helper_sve_ldbss_zsu_mte,
5382 gen_helper_sve_ldhss_be_zsu_mte,
5383 NULL, },
5384 { gen_helper_sve_ldbsu_zsu_mte,
5385 gen_helper_sve_ldhsu_be_zsu_mte,
5386 gen_helper_sve_ldss_be_zsu_mte, } },
5387 { { gen_helper_sve_ldbss_zss_mte,
5388 gen_helper_sve_ldhss_be_zss_mte,
5389 NULL, },
5390 { gen_helper_sve_ldbsu_zss_mte,
5391 gen_helper_sve_ldhsu_be_zss_mte,
5392 gen_helper_sve_ldss_be_zss_mte, } } },
5393
5394 /* First-fault */
5395 { { { gen_helper_sve_ldffbss_zsu_mte,
5396 gen_helper_sve_ldffhss_be_zsu_mte,
5397 NULL, },
5398 { gen_helper_sve_ldffbsu_zsu_mte,
5399 gen_helper_sve_ldffhsu_be_zsu_mte,
5400 gen_helper_sve_ldffss_be_zsu_mte, } },
5401 { { gen_helper_sve_ldffbss_zss_mte,
5402 gen_helper_sve_ldffhss_be_zss_mte,
5403 NULL, },
5404 { gen_helper_sve_ldffbsu_zss_mte,
5405 gen_helper_sve_ldffhsu_be_zss_mte,
5406 gen_helper_sve_ldffss_be_zss_mte, } } } } },
673e9fa6
RH
5407};
5408
5409/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5410static gen_helper_gvec_mem_scatter * const
5411gather_load_fn64[2][2][2][3][2][4] = {
5412 { /* MTE Inactive */
5413 { /* Little-endian */
5414 { { { gen_helper_sve_ldbds_zsu,
5415 gen_helper_sve_ldhds_le_zsu,
5416 gen_helper_sve_ldsds_le_zsu,
5417 NULL, },
5418 { gen_helper_sve_ldbdu_zsu,
5419 gen_helper_sve_ldhdu_le_zsu,
5420 gen_helper_sve_ldsdu_le_zsu,
5421 gen_helper_sve_lddd_le_zsu, } },
5422 { { gen_helper_sve_ldbds_zss,
5423 gen_helper_sve_ldhds_le_zss,
5424 gen_helper_sve_ldsds_le_zss,
5425 NULL, },
5426 { gen_helper_sve_ldbdu_zss,
5427 gen_helper_sve_ldhdu_le_zss,
5428 gen_helper_sve_ldsdu_le_zss,
5429 gen_helper_sve_lddd_le_zss, } },
5430 { { gen_helper_sve_ldbds_zd,
5431 gen_helper_sve_ldhds_le_zd,
5432 gen_helper_sve_ldsds_le_zd,
5433 NULL, },
5434 { gen_helper_sve_ldbdu_zd,
5435 gen_helper_sve_ldhdu_le_zd,
5436 gen_helper_sve_ldsdu_le_zd,
5437 gen_helper_sve_lddd_le_zd, } } },
5438
5439 /* First-fault */
5440 { { { gen_helper_sve_ldffbds_zsu,
5441 gen_helper_sve_ldffhds_le_zsu,
5442 gen_helper_sve_ldffsds_le_zsu,
5443 NULL, },
5444 { gen_helper_sve_ldffbdu_zsu,
5445 gen_helper_sve_ldffhdu_le_zsu,
5446 gen_helper_sve_ldffsdu_le_zsu,
5447 gen_helper_sve_ldffdd_le_zsu, } },
5448 { { gen_helper_sve_ldffbds_zss,
5449 gen_helper_sve_ldffhds_le_zss,
5450 gen_helper_sve_ldffsds_le_zss,
5451 NULL, },
5452 { gen_helper_sve_ldffbdu_zss,
5453 gen_helper_sve_ldffhdu_le_zss,
5454 gen_helper_sve_ldffsdu_le_zss,
5455 gen_helper_sve_ldffdd_le_zss, } },
5456 { { gen_helper_sve_ldffbds_zd,
5457 gen_helper_sve_ldffhds_le_zd,
5458 gen_helper_sve_ldffsds_le_zd,
5459 NULL, },
5460 { gen_helper_sve_ldffbdu_zd,
5461 gen_helper_sve_ldffhdu_le_zd,
5462 gen_helper_sve_ldffsdu_le_zd,
5463 gen_helper_sve_ldffdd_le_zd, } } } },
5464 { /* Big-endian */
5465 { { { gen_helper_sve_ldbds_zsu,
5466 gen_helper_sve_ldhds_be_zsu,
5467 gen_helper_sve_ldsds_be_zsu,
5468 NULL, },
5469 { gen_helper_sve_ldbdu_zsu,
5470 gen_helper_sve_ldhdu_be_zsu,
5471 gen_helper_sve_ldsdu_be_zsu,
5472 gen_helper_sve_lddd_be_zsu, } },
5473 { { gen_helper_sve_ldbds_zss,
5474 gen_helper_sve_ldhds_be_zss,
5475 gen_helper_sve_ldsds_be_zss,
5476 NULL, },
5477 { gen_helper_sve_ldbdu_zss,
5478 gen_helper_sve_ldhdu_be_zss,
5479 gen_helper_sve_ldsdu_be_zss,
5480 gen_helper_sve_lddd_be_zss, } },
5481 { { gen_helper_sve_ldbds_zd,
5482 gen_helper_sve_ldhds_be_zd,
5483 gen_helper_sve_ldsds_be_zd,
5484 NULL, },
5485 { gen_helper_sve_ldbdu_zd,
5486 gen_helper_sve_ldhdu_be_zd,
5487 gen_helper_sve_ldsdu_be_zd,
5488 gen_helper_sve_lddd_be_zd, } } },
5489
5490 /* First-fault */
5491 { { { gen_helper_sve_ldffbds_zsu,
5492 gen_helper_sve_ldffhds_be_zsu,
5493 gen_helper_sve_ldffsds_be_zsu,
5494 NULL, },
5495 { gen_helper_sve_ldffbdu_zsu,
5496 gen_helper_sve_ldffhdu_be_zsu,
5497 gen_helper_sve_ldffsdu_be_zsu,
5498 gen_helper_sve_ldffdd_be_zsu, } },
5499 { { gen_helper_sve_ldffbds_zss,
5500 gen_helper_sve_ldffhds_be_zss,
5501 gen_helper_sve_ldffsds_be_zss,
5502 NULL, },
5503 { gen_helper_sve_ldffbdu_zss,
5504 gen_helper_sve_ldffhdu_be_zss,
5505 gen_helper_sve_ldffsdu_be_zss,
5506 gen_helper_sve_ldffdd_be_zss, } },
5507 { { gen_helper_sve_ldffbds_zd,
5508 gen_helper_sve_ldffhds_be_zd,
5509 gen_helper_sve_ldffsds_be_zd,
5510 NULL, },
5511 { gen_helper_sve_ldffbdu_zd,
5512 gen_helper_sve_ldffhdu_be_zd,
5513 gen_helper_sve_ldffsdu_be_zd,
5514 gen_helper_sve_ldffdd_be_zd, } } } } },
5515 { /* MTE Active */
5516 { /* Little-endian */
5517 { { { gen_helper_sve_ldbds_zsu_mte,
5518 gen_helper_sve_ldhds_le_zsu_mte,
5519 gen_helper_sve_ldsds_le_zsu_mte,
5520 NULL, },
5521 { gen_helper_sve_ldbdu_zsu_mte,
5522 gen_helper_sve_ldhdu_le_zsu_mte,
5523 gen_helper_sve_ldsdu_le_zsu_mte,
5524 gen_helper_sve_lddd_le_zsu_mte, } },
5525 { { gen_helper_sve_ldbds_zss_mte,
5526 gen_helper_sve_ldhds_le_zss_mte,
5527 gen_helper_sve_ldsds_le_zss_mte,
5528 NULL, },
5529 { gen_helper_sve_ldbdu_zss_mte,
5530 gen_helper_sve_ldhdu_le_zss_mte,
5531 gen_helper_sve_ldsdu_le_zss_mte,
5532 gen_helper_sve_lddd_le_zss_mte, } },
5533 { { gen_helper_sve_ldbds_zd_mte,
5534 gen_helper_sve_ldhds_le_zd_mte,
5535 gen_helper_sve_ldsds_le_zd_mte,
5536 NULL, },
5537 { gen_helper_sve_ldbdu_zd_mte,
5538 gen_helper_sve_ldhdu_le_zd_mte,
5539 gen_helper_sve_ldsdu_le_zd_mte,
5540 gen_helper_sve_lddd_le_zd_mte, } } },
5541
5542 /* First-fault */
5543 { { { gen_helper_sve_ldffbds_zsu_mte,
5544 gen_helper_sve_ldffhds_le_zsu_mte,
5545 gen_helper_sve_ldffsds_le_zsu_mte,
5546 NULL, },
5547 { gen_helper_sve_ldffbdu_zsu_mte,
5548 gen_helper_sve_ldffhdu_le_zsu_mte,
5549 gen_helper_sve_ldffsdu_le_zsu_mte,
5550 gen_helper_sve_ldffdd_le_zsu_mte, } },
5551 { { gen_helper_sve_ldffbds_zss_mte,
5552 gen_helper_sve_ldffhds_le_zss_mte,
5553 gen_helper_sve_ldffsds_le_zss_mte,
5554 NULL, },
5555 { gen_helper_sve_ldffbdu_zss_mte,
5556 gen_helper_sve_ldffhdu_le_zss_mte,
5557 gen_helper_sve_ldffsdu_le_zss_mte,
5558 gen_helper_sve_ldffdd_le_zss_mte, } },
5559 { { gen_helper_sve_ldffbds_zd_mte,
5560 gen_helper_sve_ldffhds_le_zd_mte,
5561 gen_helper_sve_ldffsds_le_zd_mte,
5562 NULL, },
5563 { gen_helper_sve_ldffbdu_zd_mte,
5564 gen_helper_sve_ldffhdu_le_zd_mte,
5565 gen_helper_sve_ldffsdu_le_zd_mte,
5566 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5567 { /* Big-endian */
5568 { { { gen_helper_sve_ldbds_zsu_mte,
5569 gen_helper_sve_ldhds_be_zsu_mte,
5570 gen_helper_sve_ldsds_be_zsu_mte,
5571 NULL, },
5572 { gen_helper_sve_ldbdu_zsu_mte,
5573 gen_helper_sve_ldhdu_be_zsu_mte,
5574 gen_helper_sve_ldsdu_be_zsu_mte,
5575 gen_helper_sve_lddd_be_zsu_mte, } },
5576 { { gen_helper_sve_ldbds_zss_mte,
5577 gen_helper_sve_ldhds_be_zss_mte,
5578 gen_helper_sve_ldsds_be_zss_mte,
5579 NULL, },
5580 { gen_helper_sve_ldbdu_zss_mte,
5581 gen_helper_sve_ldhdu_be_zss_mte,
5582 gen_helper_sve_ldsdu_be_zss_mte,
5583 gen_helper_sve_lddd_be_zss_mte, } },
5584 { { gen_helper_sve_ldbds_zd_mte,
5585 gen_helper_sve_ldhds_be_zd_mte,
5586 gen_helper_sve_ldsds_be_zd_mte,
5587 NULL, },
5588 { gen_helper_sve_ldbdu_zd_mte,
5589 gen_helper_sve_ldhdu_be_zd_mte,
5590 gen_helper_sve_ldsdu_be_zd_mte,
5591 gen_helper_sve_lddd_be_zd_mte, } } },
5592
5593 /* First-fault */
5594 { { { gen_helper_sve_ldffbds_zsu_mte,
5595 gen_helper_sve_ldffhds_be_zsu_mte,
5596 gen_helper_sve_ldffsds_be_zsu_mte,
5597 NULL, },
5598 { gen_helper_sve_ldffbdu_zsu_mte,
5599 gen_helper_sve_ldffhdu_be_zsu_mte,
5600 gen_helper_sve_ldffsdu_be_zsu_mte,
5601 gen_helper_sve_ldffdd_be_zsu_mte, } },
5602 { { gen_helper_sve_ldffbds_zss_mte,
5603 gen_helper_sve_ldffhds_be_zss_mte,
5604 gen_helper_sve_ldffsds_be_zss_mte,
5605 NULL, },
5606 { gen_helper_sve_ldffbdu_zss_mte,
5607 gen_helper_sve_ldffhdu_be_zss_mte,
5608 gen_helper_sve_ldffsdu_be_zss_mte,
5609 gen_helper_sve_ldffdd_be_zss_mte, } },
5610 { { gen_helper_sve_ldffbds_zd_mte,
5611 gen_helper_sve_ldffhds_be_zd_mte,
5612 gen_helper_sve_ldffsds_be_zd_mte,
5613 NULL, },
5614 { gen_helper_sve_ldffbdu_zd_mte,
5615 gen_helper_sve_ldffhdu_be_zd_mte,
5616 gen_helper_sve_ldffsdu_be_zd_mte,
5617 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
673e9fa6
RH
5618};
5619
3a7be554 5620static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5621{
5622 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5623 bool be = s->be_data == MO_BE;
5624 bool mte = s->mte_active[0];
673e9fa6
RH
5625
5626 if (!sve_access_check(s)) {
5627 return true;
5628 }
5629
5630 switch (a->esz) {
5631 case MO_32:
d28d12f0 5632 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5633 break;
5634 case MO_64:
d28d12f0 5635 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5636 break;
5637 }
5638 assert(fn != NULL);
5639
5640 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5641 cpu_reg_sp(s, a->rn), a->msz, false, fn);
673e9fa6
RH
5642 return true;
5643}
5644
3a7be554 5645static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5646{
5647 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5648 bool be = s->be_data == MO_BE;
5649 bool mte = s->mte_active[0];
673e9fa6
RH
5650 TCGv_i64 imm;
5651
5652 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5653 return false;
5654 }
5655 if (!sve_access_check(s)) {
5656 return true;
5657 }
5658
5659 switch (a->esz) {
5660 case MO_32:
d28d12f0 5661 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5662 break;
5663 case MO_64:
d28d12f0 5664 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5665 break;
5666 }
5667 assert(fn != NULL);
5668
5669 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5670 * by loading the immediate into the scalar parameter.
5671 */
5672 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5673 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
673e9fa6
RH
5674 tcg_temp_free_i64(imm);
5675 return true;
5676}
5677
d28d12f0
RH
5678/* Indexed by [mte][be][xs][msz]. */
5679static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5680 { /* MTE Inactive */
5681 { /* Little-endian */
5682 { gen_helper_sve_stbs_zsu,
5683 gen_helper_sve_sths_le_zsu,
5684 gen_helper_sve_stss_le_zsu, },
5685 { gen_helper_sve_stbs_zss,
5686 gen_helper_sve_sths_le_zss,
5687 gen_helper_sve_stss_le_zss, } },
5688 { /* Big-endian */
5689 { gen_helper_sve_stbs_zsu,
5690 gen_helper_sve_sths_be_zsu,
5691 gen_helper_sve_stss_be_zsu, },
5692 { gen_helper_sve_stbs_zss,
5693 gen_helper_sve_sths_be_zss,
5694 gen_helper_sve_stss_be_zss, } } },
5695 { /* MTE Active */
5696 { /* Little-endian */
5697 { gen_helper_sve_stbs_zsu_mte,
5698 gen_helper_sve_sths_le_zsu_mte,
5699 gen_helper_sve_stss_le_zsu_mte, },
5700 { gen_helper_sve_stbs_zss_mte,
5701 gen_helper_sve_sths_le_zss_mte,
5702 gen_helper_sve_stss_le_zss_mte, } },
5703 { /* Big-endian */
5704 { gen_helper_sve_stbs_zsu_mte,
5705 gen_helper_sve_sths_be_zsu_mte,
5706 gen_helper_sve_stss_be_zsu_mte, },
5707 { gen_helper_sve_stbs_zss_mte,
5708 gen_helper_sve_sths_be_zss_mte,
5709 gen_helper_sve_stss_be_zss_mte, } } },
408ecde9
RH
5710};
5711
5712/* Note that we overload xs=2 to indicate 64-bit offset. */
d28d12f0
RH
5713static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5714 { /* MTE Inactive */
5715 { /* Little-endian */
5716 { gen_helper_sve_stbd_zsu,
5717 gen_helper_sve_sthd_le_zsu,
5718 gen_helper_sve_stsd_le_zsu,
5719 gen_helper_sve_stdd_le_zsu, },
5720 { gen_helper_sve_stbd_zss,
5721 gen_helper_sve_sthd_le_zss,
5722 gen_helper_sve_stsd_le_zss,
5723 gen_helper_sve_stdd_le_zss, },
5724 { gen_helper_sve_stbd_zd,
5725 gen_helper_sve_sthd_le_zd,
5726 gen_helper_sve_stsd_le_zd,
5727 gen_helper_sve_stdd_le_zd, } },
5728 { /* Big-endian */
5729 { gen_helper_sve_stbd_zsu,
5730 gen_helper_sve_sthd_be_zsu,
5731 gen_helper_sve_stsd_be_zsu,
5732 gen_helper_sve_stdd_be_zsu, },
5733 { gen_helper_sve_stbd_zss,
5734 gen_helper_sve_sthd_be_zss,
5735 gen_helper_sve_stsd_be_zss,
5736 gen_helper_sve_stdd_be_zss, },
5737 { gen_helper_sve_stbd_zd,
5738 gen_helper_sve_sthd_be_zd,
5739 gen_helper_sve_stsd_be_zd,
5740 gen_helper_sve_stdd_be_zd, } } },
5741 { /* MTE Inactive */
5742 { /* Little-endian */
5743 { gen_helper_sve_stbd_zsu_mte,
5744 gen_helper_sve_sthd_le_zsu_mte,
5745 gen_helper_sve_stsd_le_zsu_mte,
5746 gen_helper_sve_stdd_le_zsu_mte, },
5747 { gen_helper_sve_stbd_zss_mte,
5748 gen_helper_sve_sthd_le_zss_mte,
5749 gen_helper_sve_stsd_le_zss_mte,
5750 gen_helper_sve_stdd_le_zss_mte, },
5751 { gen_helper_sve_stbd_zd_mte,
5752 gen_helper_sve_sthd_le_zd_mte,
5753 gen_helper_sve_stsd_le_zd_mte,
5754 gen_helper_sve_stdd_le_zd_mte, } },
5755 { /* Big-endian */
5756 { gen_helper_sve_stbd_zsu_mte,
5757 gen_helper_sve_sthd_be_zsu_mte,
5758 gen_helper_sve_stsd_be_zsu_mte,
5759 gen_helper_sve_stdd_be_zsu_mte, },
5760 { gen_helper_sve_stbd_zss_mte,
5761 gen_helper_sve_sthd_be_zss_mte,
5762 gen_helper_sve_stsd_be_zss_mte,
5763 gen_helper_sve_stdd_be_zss_mte, },
5764 { gen_helper_sve_stbd_zd_mte,
5765 gen_helper_sve_sthd_be_zd_mte,
5766 gen_helper_sve_stsd_be_zd_mte,
5767 gen_helper_sve_stdd_be_zd_mte, } } },
408ecde9
RH
5768};
5769
3a7be554 5770static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5771{
f6dbf62a 5772 gen_helper_gvec_mem_scatter *fn;
d28d12f0
RH
5773 bool be = s->be_data == MO_BE;
5774 bool mte = s->mte_active[0];
f6dbf62a
RH
5775
5776 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5777 return false;
5778 }
5779 if (!sve_access_check(s)) {
5780 return true;
5781 }
5782 switch (a->esz) {
5783 case MO_32:
d28d12f0 5784 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
f6dbf62a
RH
5785 break;
5786 case MO_64:
d28d12f0 5787 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
f6dbf62a
RH
5788 break;
5789 default:
5790 g_assert_not_reached();
5791 }
5792 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
d28d12f0 5793 cpu_reg_sp(s, a->rn), a->msz, true, fn);
f6dbf62a
RH
5794 return true;
5795}
dec6cf6b 5796
3a7be554 5797static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5798{
5799 gen_helper_gvec_mem_scatter *fn = NULL;
d28d12f0
RH
5800 bool be = s->be_data == MO_BE;
5801 bool mte = s->mte_active[0];
408ecde9
RH
5802 TCGv_i64 imm;
5803
5804 if (a->esz < a->msz) {
5805 return false;
5806 }
5807 if (!sve_access_check(s)) {
5808 return true;
5809 }
5810
5811 switch (a->esz) {
5812 case MO_32:
d28d12f0 5813 fn = scatter_store_fn32[mte][be][0][a->msz];
408ecde9
RH
5814 break;
5815 case MO_64:
d28d12f0 5816 fn = scatter_store_fn64[mte][be][2][a->msz];
408ecde9
RH
5817 break;
5818 }
5819 assert(fn != NULL);
5820
5821 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5822 * by loading the immediate into the scalar parameter.
5823 */
5824 imm = tcg_const_i64(a->imm << a->msz);
d28d12f0 5825 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
408ecde9
RH
5826 tcg_temp_free_i64(imm);
5827 return true;
5828}
5829
dec6cf6b
RH
5830/*
5831 * Prefetches
5832 */
5833
3a7be554 5834static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
5835{
5836 /* Prefetch is a nop within QEMU. */
2f95a3b0 5837 (void)sve_access_check(s);
dec6cf6b
RH
5838 return true;
5839}
5840
3a7be554 5841static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
5842{
5843 if (a->rm == 31) {
5844 return false;
5845 }
5846 /* Prefetch is a nop within QEMU. */
2f95a3b0 5847 (void)sve_access_check(s);
dec6cf6b
RH
5848 return true;
5849}
a2103582
RH
5850
5851/*
5852 * Move Prefix
5853 *
5854 * TODO: The implementation so far could handle predicated merging movprfx.
5855 * The helper functions as written take an extra source register to
5856 * use in the operation, but the result is only written when predication
5857 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5858 * to allow the final write back to the destination to be unconditional.
5859 * For predicated zeroing movprfx, we need to rearrange the helpers to
5860 * allow the final write back to zero inactives.
5861 *
5862 * In the meantime, just emit the moves.
5863 */
5864
3a7be554 5865static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
5866{
5867 return do_mov_z(s, a->rd, a->rn);
5868}
5869
3a7be554 5870static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
5871{
5872 if (sve_access_check(s)) {
5873 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5874 }
5875 return true;
5876}
5877
3a7be554 5878static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
5879{
5880 if (sve_access_check(s)) {
5881 do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5882 }
5883 return true;
5884}