]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
decodetree: Add DisasContext argument to !function expanders
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
cc48affe 35#include "fpu/softfloat.h"
38388f7e 36
757f9cff 37
9ee3a611
RH
38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
38cadeba
RH
41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
757f9cff
RH
43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
c4e7c493 46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 49
ccd841c3
RH
50/*
51 * Helpers for extracting complex instruction fields.
52 */
53
54/* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
451e4ffd 57static int tszimm_esz(DisasContext *s, int x)
ccd841c3
RH
58{
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61}
62
451e4ffd 63static int tszimm_shr(DisasContext *s, int x)
ccd841c3 64{
451e4ffd 65 return (16 << tszimm_esz(s, x)) - x;
ccd841c3
RH
66}
67
68/* See e.g. LSL (immediate, predicated). */
451e4ffd 69static int tszimm_shl(DisasContext *s, int x)
ccd841c3 70{
451e4ffd 71 return x - (8 << tszimm_esz(s, x));
ccd841c3
RH
72}
73
451e4ffd 74static inline int plus1(DisasContext *s, int x)
24e82e68
RH
75{
76 return x + 1;
77}
78
f25a2361 79/* The SH bit is in bit 8. Extract the low 8 and shift. */
451e4ffd 80static inline int expand_imm_sh8s(DisasContext *s, int x)
f25a2361
RH
81{
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83}
84
451e4ffd 85static inline int expand_imm_sh8u(DisasContext *s, int x)
6e6a157d
RH
86{
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88}
89
c4e7c493
RH
90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
451e4ffd 93static inline int msz_dtype(DisasContext *s, int msz)
c4e7c493
RH
94{
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97}
98
38388f7e
RH
99/*
100 * Include the generated decoder.
101 */
102
103#include "decode-sve.inc.c"
104
105/*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
d1822297
RH
109/* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112static inline int pred_full_reg_offset(DisasContext *s, int regno)
113{
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115}
116
117/* Return the byte size of the whole predicate register, VL / 64. */
118static inline int pred_full_reg_size(DisasContext *s)
119{
120 return s->sve_len >> 3;
121}
122
516e246a
RH
123/* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131static int size_for_gvec(int size)
132{
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138}
139
140static int pred_gvec_reg_size(DisasContext *s)
141{
142 return size_for_gvec(pred_full_reg_size(s));
143}
144
39eea561
RH
145/* Invoke a vector expander on two Zregs. */
146static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
147 int esz, int rd, int rn)
38388f7e 148{
39eea561
RH
149 if (sve_access_check(s)) {
150 unsigned vsz = vec_full_reg_size(s);
151 gvec_fn(esz, vec_full_reg_offset(s, rd),
152 vec_full_reg_offset(s, rn), vsz, vsz);
153 }
154 return true;
38388f7e
RH
155}
156
39eea561
RH
157/* Invoke a vector expander on three Zregs. */
158static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
159 int esz, int rd, int rn, int rm)
38388f7e 160{
39eea561
RH
161 if (sve_access_check(s)) {
162 unsigned vsz = vec_full_reg_size(s);
163 gvec_fn(esz, vec_full_reg_offset(s, rd),
164 vec_full_reg_offset(s, rn),
165 vec_full_reg_offset(s, rm), vsz, vsz);
166 }
167 return true;
38388f7e
RH
168}
169
39eea561
RH
170/* Invoke a vector move on two Zregs. */
171static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 172{
39eea561 173 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
174}
175
d9d78dcc
RH
176/* Initialize a Zreg with replications of a 64-bit immediate. */
177static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
178{
179 unsigned vsz = vec_full_reg_size(s);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
181}
182
516e246a
RH
183/* Invoke a vector expander on two Pregs. */
184static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
185 int esz, int rd, int rn)
186{
187 if (sve_access_check(s)) {
188 unsigned psz = pred_gvec_reg_size(s);
189 gvec_fn(esz, pred_full_reg_offset(s, rd),
190 pred_full_reg_offset(s, rn), psz, psz);
191 }
192 return true;
193}
194
195/* Invoke a vector expander on three Pregs. */
196static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
197 int esz, int rd, int rn, int rm)
198{
199 if (sve_access_check(s)) {
200 unsigned psz = pred_gvec_reg_size(s);
201 gvec_fn(esz, pred_full_reg_offset(s, rd),
202 pred_full_reg_offset(s, rn),
203 pred_full_reg_offset(s, rm), psz, psz);
204 }
205 return true;
206}
207
208/* Invoke a vector operation on four Pregs. */
209static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
210 int rd, int rn, int rm, int rg)
211{
212 if (sve_access_check(s)) {
213 unsigned psz = pred_gvec_reg_size(s);
214 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
215 pred_full_reg_offset(s, rn),
216 pred_full_reg_offset(s, rm),
217 pred_full_reg_offset(s, rg),
218 psz, psz, gvec_op);
219 }
220 return true;
221}
222
223/* Invoke a vector move on two Pregs. */
224static bool do_mov_p(DisasContext *s, int rd, int rn)
225{
226 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
227}
228
9e18d7a6
RH
229/* Set the cpu flags as per a return from an SVE helper. */
230static void do_pred_flags(TCGv_i32 t)
231{
232 tcg_gen_mov_i32(cpu_NF, t);
233 tcg_gen_andi_i32(cpu_ZF, t, 2);
234 tcg_gen_andi_i32(cpu_CF, t, 1);
235 tcg_gen_movi_i32(cpu_VF, 0);
236}
237
238/* Subroutines computing the ARM PredTest psuedofunction. */
239static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
240{
241 TCGv_i32 t = tcg_temp_new_i32();
242
243 gen_helper_sve_predtest1(t, d, g);
244 do_pred_flags(t);
245 tcg_temp_free_i32(t);
246}
247
248static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
249{
250 TCGv_ptr dptr = tcg_temp_new_ptr();
251 TCGv_ptr gptr = tcg_temp_new_ptr();
252 TCGv_i32 t;
253
254 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
255 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
256 t = tcg_const_i32(words);
257
258 gen_helper_sve_predtest(t, dptr, gptr, t);
259 tcg_temp_free_ptr(dptr);
260 tcg_temp_free_ptr(gptr);
261
262 do_pred_flags(t);
263 tcg_temp_free_i32(t);
264}
265
028e2a7b
RH
266/* For each element size, the bits within a predicate word that are active. */
267const uint64_t pred_esz_masks[4] = {
268 0xffffffffffffffffull, 0x5555555555555555ull,
269 0x1111111111111111ull, 0x0101010101010101ull
270};
271
39eea561
RH
272/*
273 *** SVE Logical - Unpredicated Group
274 */
275
3a7be554 276static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561
RH
277{
278 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
279}
280
3a7be554 281static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561 282{
2900847f 283 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
39eea561
RH
284}
285
3a7be554 286static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
39eea561
RH
287{
288 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
289}
290
3a7be554 291static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
38388f7e 292{
39eea561 293 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 294}
d1822297 295
fea98f9c
RH
296/*
297 *** SVE Integer Arithmetic - Unpredicated Group
298 */
299
3a7be554 300static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
301{
302 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
303}
304
3a7be554 305static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
306{
307 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
308}
309
3a7be554 310static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
311{
312 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
313}
314
3a7be554 315static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
316{
317 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
318}
319
3a7be554 320static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
321{
322 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
323}
324
3a7be554 325static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
fea98f9c
RH
326{
327 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
328}
329
f97cfd59
RH
330/*
331 *** SVE Integer Arithmetic - Binary Predicated Group
332 */
333
334static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
335{
336 unsigned vsz = vec_full_reg_size(s);
337 if (fn == NULL) {
338 return false;
339 }
340 if (sve_access_check(s)) {
341 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
342 vec_full_reg_offset(s, a->rn),
343 vec_full_reg_offset(s, a->rm),
344 pred_full_reg_offset(s, a->pg),
345 vsz, vsz, 0, fn);
346 }
347 return true;
348}
349
a2103582
RH
350/* Select active elememnts from Zn and inactive elements from Zm,
351 * storing the result in Zd.
352 */
353static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
354{
355 static gen_helper_gvec_4 * const fns[4] = {
356 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
357 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
358 };
359 unsigned vsz = vec_full_reg_size(s);
360 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
361 vec_full_reg_offset(s, rn),
362 vec_full_reg_offset(s, rm),
363 pred_full_reg_offset(s, pg),
364 vsz, vsz, 0, fns[esz]);
365}
366
f97cfd59 367#define DO_ZPZZ(NAME, name) \
3a7be554 368static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
f97cfd59
RH
369{ \
370 static gen_helper_gvec_4 * const fns[4] = { \
371 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
372 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
373 }; \
374 return do_zpzz_ool(s, a, fns[a->esz]); \
375}
376
377DO_ZPZZ(AND, and)
378DO_ZPZZ(EOR, eor)
379DO_ZPZZ(ORR, orr)
380DO_ZPZZ(BIC, bic)
381
382DO_ZPZZ(ADD, add)
383DO_ZPZZ(SUB, sub)
384
385DO_ZPZZ(SMAX, smax)
386DO_ZPZZ(UMAX, umax)
387DO_ZPZZ(SMIN, smin)
388DO_ZPZZ(UMIN, umin)
389DO_ZPZZ(SABD, sabd)
390DO_ZPZZ(UABD, uabd)
391
392DO_ZPZZ(MUL, mul)
393DO_ZPZZ(SMULH, smulh)
394DO_ZPZZ(UMULH, umulh)
395
27721dbb
RH
396DO_ZPZZ(ASR, asr)
397DO_ZPZZ(LSR, lsr)
398DO_ZPZZ(LSL, lsl)
399
3a7be554 400static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
401{
402 static gen_helper_gvec_4 * const fns[4] = {
403 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
404 };
405 return do_zpzz_ool(s, a, fns[a->esz]);
406}
407
3a7be554 408static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
f97cfd59
RH
409{
410 static gen_helper_gvec_4 * const fns[4] = {
411 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
412 };
413 return do_zpzz_ool(s, a, fns[a->esz]);
414}
415
3a7be554 416static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
a2103582
RH
417{
418 if (sve_access_check(s)) {
419 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
420 }
421 return true;
422}
d3fe4a29 423
f97cfd59
RH
424#undef DO_ZPZZ
425
afac6d04
RH
426/*
427 *** SVE Integer Arithmetic - Unary Predicated Group
428 */
429
430static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
431{
432 if (fn == NULL) {
433 return false;
434 }
435 if (sve_access_check(s)) {
436 unsigned vsz = vec_full_reg_size(s);
437 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
438 vec_full_reg_offset(s, a->rn),
439 pred_full_reg_offset(s, a->pg),
440 vsz, vsz, 0, fn);
441 }
442 return true;
443}
444
445#define DO_ZPZ(NAME, name) \
3a7be554 446static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
afac6d04
RH
447{ \
448 static gen_helper_gvec_3 * const fns[4] = { \
449 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
450 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
451 }; \
452 return do_zpz_ool(s, a, fns[a->esz]); \
453}
454
455DO_ZPZ(CLS, cls)
456DO_ZPZ(CLZ, clz)
457DO_ZPZ(CNT_zpz, cnt_zpz)
458DO_ZPZ(CNOT, cnot)
459DO_ZPZ(NOT_zpz, not_zpz)
460DO_ZPZ(ABS, abs)
461DO_ZPZ(NEG, neg)
462
3a7be554 463static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
464{
465 static gen_helper_gvec_3 * const fns[4] = {
466 NULL,
467 gen_helper_sve_fabs_h,
468 gen_helper_sve_fabs_s,
469 gen_helper_sve_fabs_d
470 };
471 return do_zpz_ool(s, a, fns[a->esz]);
472}
473
3a7be554 474static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
475{
476 static gen_helper_gvec_3 * const fns[4] = {
477 NULL,
478 gen_helper_sve_fneg_h,
479 gen_helper_sve_fneg_s,
480 gen_helper_sve_fneg_d
481 };
482 return do_zpz_ool(s, a, fns[a->esz]);
483}
484
3a7be554 485static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
486{
487 static gen_helper_gvec_3 * const fns[4] = {
488 NULL,
489 gen_helper_sve_sxtb_h,
490 gen_helper_sve_sxtb_s,
491 gen_helper_sve_sxtb_d
492 };
493 return do_zpz_ool(s, a, fns[a->esz]);
494}
495
3a7be554 496static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
497{
498 static gen_helper_gvec_3 * const fns[4] = {
499 NULL,
500 gen_helper_sve_uxtb_h,
501 gen_helper_sve_uxtb_s,
502 gen_helper_sve_uxtb_d
503 };
504 return do_zpz_ool(s, a, fns[a->esz]);
505}
506
3a7be554 507static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
508{
509 static gen_helper_gvec_3 * const fns[4] = {
510 NULL, NULL,
511 gen_helper_sve_sxth_s,
512 gen_helper_sve_sxth_d
513 };
514 return do_zpz_ool(s, a, fns[a->esz]);
515}
516
3a7be554 517static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
518{
519 static gen_helper_gvec_3 * const fns[4] = {
520 NULL, NULL,
521 gen_helper_sve_uxth_s,
522 gen_helper_sve_uxth_d
523 };
524 return do_zpz_ool(s, a, fns[a->esz]);
525}
526
3a7be554 527static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
528{
529 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
530}
531
3a7be554 532static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
afac6d04
RH
533{
534 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
535}
536
537#undef DO_ZPZ
538
047cec97
RH
539/*
540 *** SVE Integer Reduction Group
541 */
542
543typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
544static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
545 gen_helper_gvec_reduc *fn)
546{
547 unsigned vsz = vec_full_reg_size(s);
548 TCGv_ptr t_zn, t_pg;
549 TCGv_i32 desc;
550 TCGv_i64 temp;
551
552 if (fn == NULL) {
553 return false;
554 }
555 if (!sve_access_check(s)) {
556 return true;
557 }
558
559 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
560 temp = tcg_temp_new_i64();
561 t_zn = tcg_temp_new_ptr();
562 t_pg = tcg_temp_new_ptr();
563
564 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
565 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
566 fn(temp, t_zn, t_pg, desc);
567 tcg_temp_free_ptr(t_zn);
568 tcg_temp_free_ptr(t_pg);
569 tcg_temp_free_i32(desc);
570
571 write_fp_dreg(s, a->rd, temp);
572 tcg_temp_free_i64(temp);
573 return true;
574}
575
576#define DO_VPZ(NAME, name) \
3a7be554 577static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
047cec97
RH
578{ \
579 static gen_helper_gvec_reduc * const fns[4] = { \
580 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
581 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
582 }; \
583 return do_vpz_ool(s, a, fns[a->esz]); \
584}
585
586DO_VPZ(ORV, orv)
587DO_VPZ(ANDV, andv)
588DO_VPZ(EORV, eorv)
589
590DO_VPZ(UADDV, uaddv)
591DO_VPZ(SMAXV, smaxv)
592DO_VPZ(UMAXV, umaxv)
593DO_VPZ(SMINV, sminv)
594DO_VPZ(UMINV, uminv)
595
3a7be554 596static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
047cec97
RH
597{
598 static gen_helper_gvec_reduc * const fns[4] = {
599 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
600 gen_helper_sve_saddv_s, NULL
601 };
602 return do_vpz_ool(s, a, fns[a->esz]);
603}
604
605#undef DO_VPZ
606
ccd841c3
RH
607/*
608 *** SVE Shift by Immediate - Predicated Group
609 */
610
611/* Store zero into every active element of Zd. We will use this for two
612 * and three-operand predicated instructions for which logic dictates a
613 * zero result.
614 */
615static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
616{
617 static gen_helper_gvec_2 * const fns[4] = {
618 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
619 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
620 };
621 if (sve_access_check(s)) {
622 unsigned vsz = vec_full_reg_size(s);
623 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
624 pred_full_reg_offset(s, pg),
625 vsz, vsz, 0, fns[esz]);
626 }
627 return true;
628}
629
68459864
RH
630/* Copy Zn into Zd, storing zeros into inactive elements. */
631static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
632{
633 static gen_helper_gvec_3 * const fns[4] = {
634 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
635 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
636 };
637 unsigned vsz = vec_full_reg_size(s);
638 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
639 vec_full_reg_offset(s, rn),
640 pred_full_reg_offset(s, pg),
641 vsz, vsz, 0, fns[esz]);
642}
643
ccd841c3
RH
644static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
645 gen_helper_gvec_3 *fn)
646{
647 if (sve_access_check(s)) {
648 unsigned vsz = vec_full_reg_size(s);
649 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
650 vec_full_reg_offset(s, a->rn),
651 pred_full_reg_offset(s, a->pg),
652 vsz, vsz, a->imm, fn);
653 }
654 return true;
655}
656
3a7be554 657static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
658{
659 static gen_helper_gvec_3 * const fns[4] = {
660 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
661 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
662 };
663 if (a->esz < 0) {
664 /* Invalid tsz encoding -- see tszimm_esz. */
665 return false;
666 }
667 /* Shift by element size is architecturally valid. For
668 arithmetic right-shift, it's the same as by one less. */
669 a->imm = MIN(a->imm, (8 << a->esz) - 1);
670 return do_zpzi_ool(s, a, fns[a->esz]);
671}
672
3a7be554 673static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
674{
675 static gen_helper_gvec_3 * const fns[4] = {
676 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
677 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
678 };
679 if (a->esz < 0) {
680 return false;
681 }
682 /* Shift by element size is architecturally valid.
683 For logical shifts, it is a zeroing operation. */
684 if (a->imm >= (8 << a->esz)) {
685 return do_clr_zp(s, a->rd, a->pg, a->esz);
686 } else {
687 return do_zpzi_ool(s, a, fns[a->esz]);
688 }
689}
690
3a7be554 691static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
692{
693 static gen_helper_gvec_3 * const fns[4] = {
694 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
695 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
696 };
697 if (a->esz < 0) {
698 return false;
699 }
700 /* Shift by element size is architecturally valid.
701 For logical shifts, it is a zeroing operation. */
702 if (a->imm >= (8 << a->esz)) {
703 return do_clr_zp(s, a->rd, a->pg, a->esz);
704 } else {
705 return do_zpzi_ool(s, a, fns[a->esz]);
706 }
707}
708
3a7be554 709static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
ccd841c3
RH
710{
711 static gen_helper_gvec_3 * const fns[4] = {
712 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
713 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
714 };
715 if (a->esz < 0) {
716 return false;
717 }
718 /* Shift by element size is architecturally valid. For arithmetic
719 right shift for division, it is a zeroing operation. */
720 if (a->imm >= (8 << a->esz)) {
721 return do_clr_zp(s, a->rd, a->pg, a->esz);
722 } else {
723 return do_zpzi_ool(s, a, fns[a->esz]);
724 }
725}
726
fe7f8dfb
RH
727/*
728 *** SVE Bitwise Shift - Predicated Group
729 */
730
731#define DO_ZPZW(NAME, name) \
3a7be554 732static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
fe7f8dfb
RH
733{ \
734 static gen_helper_gvec_4 * const fns[3] = { \
735 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
736 gen_helper_sve_##name##_zpzw_s, \
737 }; \
738 if (a->esz < 0 || a->esz >= 3) { \
739 return false; \
740 } \
741 return do_zpzz_ool(s, a, fns[a->esz]); \
742}
743
744DO_ZPZW(ASR, asr)
745DO_ZPZW(LSR, lsr)
746DO_ZPZW(LSL, lsl)
747
748#undef DO_ZPZW
749
d9d78dcc
RH
750/*
751 *** SVE Bitwise Shift - Unpredicated Group
752 */
753
754static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
755 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
756 int64_t, uint32_t, uint32_t))
757{
758 if (a->esz < 0) {
759 /* Invalid tsz encoding -- see tszimm_esz. */
760 return false;
761 }
762 if (sve_access_check(s)) {
763 unsigned vsz = vec_full_reg_size(s);
764 /* Shift by element size is architecturally valid. For
765 arithmetic right-shift, it's the same as by one less.
766 Otherwise it is a zeroing operation. */
767 if (a->imm >= 8 << a->esz) {
768 if (asr) {
769 a->imm = (8 << a->esz) - 1;
770 } else {
771 do_dupi_z(s, a->rd, 0);
772 return true;
773 }
774 }
775 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
776 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
777 }
778 return true;
779}
780
3a7be554 781static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
782{
783 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
784}
785
3a7be554 786static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
787{
788 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
789}
790
3a7be554 791static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
d9d78dcc
RH
792{
793 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
794}
795
796static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
797{
798 if (fn == NULL) {
799 return false;
800 }
801 if (sve_access_check(s)) {
802 unsigned vsz = vec_full_reg_size(s);
803 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
804 vec_full_reg_offset(s, a->rn),
805 vec_full_reg_offset(s, a->rm),
806 vsz, vsz, 0, fn);
807 }
808 return true;
809}
810
811#define DO_ZZW(NAME, name) \
3a7be554 812static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
d9d78dcc
RH
813{ \
814 static gen_helper_gvec_3 * const fns[4] = { \
815 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
816 gen_helper_sve_##name##_zzw_s, NULL \
817 }; \
818 return do_zzw_ool(s, a, fns[a->esz]); \
819}
820
821DO_ZZW(ASR, asr)
822DO_ZZW(LSR, lsr)
823DO_ZZW(LSL, lsl)
824
825#undef DO_ZZW
826
96a36e4a
RH
827/*
828 *** SVE Integer Multiply-Add Group
829 */
830
831static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
832 gen_helper_gvec_5 *fn)
833{
834 if (sve_access_check(s)) {
835 unsigned vsz = vec_full_reg_size(s);
836 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
837 vec_full_reg_offset(s, a->ra),
838 vec_full_reg_offset(s, a->rn),
839 vec_full_reg_offset(s, a->rm),
840 pred_full_reg_offset(s, a->pg),
841 vsz, vsz, 0, fn);
842 }
843 return true;
844}
845
846#define DO_ZPZZZ(NAME, name) \
3a7be554 847static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
96a36e4a
RH
848{ \
849 static gen_helper_gvec_5 * const fns[4] = { \
850 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
851 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
852 }; \
853 return do_zpzzz_ool(s, a, fns[a->esz]); \
854}
855
856DO_ZPZZZ(MLA, mla)
857DO_ZPZZZ(MLS, mls)
858
859#undef DO_ZPZZZ
860
9a56c9c3
RH
861/*
862 *** SVE Index Generation Group
863 */
864
865static void do_index(DisasContext *s, int esz, int rd,
866 TCGv_i64 start, TCGv_i64 incr)
867{
868 unsigned vsz = vec_full_reg_size(s);
869 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
870 TCGv_ptr t_zd = tcg_temp_new_ptr();
871
872 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
873 if (esz == 3) {
874 gen_helper_sve_index_d(t_zd, start, incr, desc);
875 } else {
876 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
877 static index_fn * const fns[3] = {
878 gen_helper_sve_index_b,
879 gen_helper_sve_index_h,
880 gen_helper_sve_index_s,
881 };
882 TCGv_i32 s32 = tcg_temp_new_i32();
883 TCGv_i32 i32 = tcg_temp_new_i32();
884
885 tcg_gen_extrl_i64_i32(s32, start);
886 tcg_gen_extrl_i64_i32(i32, incr);
887 fns[esz](t_zd, s32, i32, desc);
888
889 tcg_temp_free_i32(s32);
890 tcg_temp_free_i32(i32);
891 }
892 tcg_temp_free_ptr(t_zd);
893 tcg_temp_free_i32(desc);
894}
895
3a7be554 896static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
9a56c9c3
RH
897{
898 if (sve_access_check(s)) {
899 TCGv_i64 start = tcg_const_i64(a->imm1);
900 TCGv_i64 incr = tcg_const_i64(a->imm2);
901 do_index(s, a->esz, a->rd, start, incr);
902 tcg_temp_free_i64(start);
903 tcg_temp_free_i64(incr);
904 }
905 return true;
906}
907
3a7be554 908static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
9a56c9c3
RH
909{
910 if (sve_access_check(s)) {
911 TCGv_i64 start = tcg_const_i64(a->imm);
912 TCGv_i64 incr = cpu_reg(s, a->rm);
913 do_index(s, a->esz, a->rd, start, incr);
914 tcg_temp_free_i64(start);
915 }
916 return true;
917}
918
3a7be554 919static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
9a56c9c3
RH
920{
921 if (sve_access_check(s)) {
922 TCGv_i64 start = cpu_reg(s, a->rn);
923 TCGv_i64 incr = tcg_const_i64(a->imm);
924 do_index(s, a->esz, a->rd, start, incr);
925 tcg_temp_free_i64(incr);
926 }
927 return true;
928}
929
3a7be554 930static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
9a56c9c3
RH
931{
932 if (sve_access_check(s)) {
933 TCGv_i64 start = cpu_reg(s, a->rn);
934 TCGv_i64 incr = cpu_reg(s, a->rm);
935 do_index(s, a->esz, a->rd, start, incr);
936 }
937 return true;
938}
939
96f922cc
RH
940/*
941 *** SVE Stack Allocation Group
942 */
943
3a7be554 944static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
96f922cc 945{
5de56742
AC
946 if (sve_access_check(s)) {
947 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
948 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
949 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
950 }
96f922cc
RH
951 return true;
952}
953
3a7be554 954static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
96f922cc 955{
5de56742
AC
956 if (sve_access_check(s)) {
957 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
958 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
959 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
960 }
96f922cc
RH
961 return true;
962}
963
3a7be554 964static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
96f922cc 965{
5de56742
AC
966 if (sve_access_check(s)) {
967 TCGv_i64 reg = cpu_reg(s, a->rd);
968 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
969 }
96f922cc
RH
970 return true;
971}
972
4b242d9c
RH
973/*
974 *** SVE Compute Vector Address Group
975 */
976
977static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
978{
979 if (sve_access_check(s)) {
980 unsigned vsz = vec_full_reg_size(s);
981 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
982 vec_full_reg_offset(s, a->rn),
983 vec_full_reg_offset(s, a->rm),
984 vsz, vsz, a->imm, fn);
985 }
986 return true;
987}
988
3a7be554 989static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
990{
991 return do_adr(s, a, gen_helper_sve_adr_p32);
992}
993
3a7be554 994static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
4b242d9c
RH
995{
996 return do_adr(s, a, gen_helper_sve_adr_p64);
997}
998
3a7be554 999static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1000{
1001 return do_adr(s, a, gen_helper_sve_adr_s32);
1002}
1003
3a7be554 1004static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
4b242d9c
RH
1005{
1006 return do_adr(s, a, gen_helper_sve_adr_u32);
1007}
1008
0762cd42
RH
1009/*
1010 *** SVE Integer Misc - Unpredicated Group
1011 */
1012
3a7be554 1013static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
0762cd42
RH
1014{
1015 static gen_helper_gvec_2 * const fns[4] = {
1016 NULL,
1017 gen_helper_sve_fexpa_h,
1018 gen_helper_sve_fexpa_s,
1019 gen_helper_sve_fexpa_d,
1020 };
1021 if (a->esz == 0) {
1022 return false;
1023 }
1024 if (sve_access_check(s)) {
1025 unsigned vsz = vec_full_reg_size(s);
1026 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1027 vec_full_reg_offset(s, a->rn),
1028 vsz, vsz, 0, fns[a->esz]);
1029 }
1030 return true;
1031}
1032
3a7be554 1033static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
a1f233f2
RH
1034{
1035 static gen_helper_gvec_3 * const fns[4] = {
1036 NULL,
1037 gen_helper_sve_ftssel_h,
1038 gen_helper_sve_ftssel_s,
1039 gen_helper_sve_ftssel_d,
1040 };
1041 if (a->esz == 0) {
1042 return false;
1043 }
1044 if (sve_access_check(s)) {
1045 unsigned vsz = vec_full_reg_size(s);
1046 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1047 vec_full_reg_offset(s, a->rn),
1048 vec_full_reg_offset(s, a->rm),
1049 vsz, vsz, 0, fns[a->esz]);
1050 }
1051 return true;
1052}
1053
516e246a
RH
1054/*
1055 *** SVE Predicate Logical Operations Group
1056 */
1057
1058static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1059 const GVecGen4 *gvec_op)
1060{
1061 if (!sve_access_check(s)) {
1062 return true;
1063 }
1064
1065 unsigned psz = pred_gvec_reg_size(s);
1066 int dofs = pred_full_reg_offset(s, a->rd);
1067 int nofs = pred_full_reg_offset(s, a->rn);
1068 int mofs = pred_full_reg_offset(s, a->rm);
1069 int gofs = pred_full_reg_offset(s, a->pg);
1070
1071 if (psz == 8) {
1072 /* Do the operation and the flags generation in temps. */
1073 TCGv_i64 pd = tcg_temp_new_i64();
1074 TCGv_i64 pn = tcg_temp_new_i64();
1075 TCGv_i64 pm = tcg_temp_new_i64();
1076 TCGv_i64 pg = tcg_temp_new_i64();
1077
1078 tcg_gen_ld_i64(pn, cpu_env, nofs);
1079 tcg_gen_ld_i64(pm, cpu_env, mofs);
1080 tcg_gen_ld_i64(pg, cpu_env, gofs);
1081
1082 gvec_op->fni8(pd, pn, pm, pg);
1083 tcg_gen_st_i64(pd, cpu_env, dofs);
1084
1085 do_predtest1(pd, pg);
1086
1087 tcg_temp_free_i64(pd);
1088 tcg_temp_free_i64(pn);
1089 tcg_temp_free_i64(pm);
1090 tcg_temp_free_i64(pg);
1091 } else {
1092 /* The operation and flags generation is large. The computation
1093 * of the flags depends on the original contents of the guarding
1094 * predicate. If the destination overwrites the guarding predicate,
1095 * then the easiest way to get this right is to save a copy.
1096 */
1097 int tofs = gofs;
1098 if (a->rd == a->pg) {
1099 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1100 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1101 }
1102
1103 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1104 do_predtest(s, dofs, tofs, psz / 8);
1105 }
1106 return true;
1107}
1108
1109static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110{
1111 tcg_gen_and_i64(pd, pn, pm);
1112 tcg_gen_and_i64(pd, pd, pg);
1113}
1114
1115static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116 TCGv_vec pm, TCGv_vec pg)
1117{
1118 tcg_gen_and_vec(vece, pd, pn, pm);
1119 tcg_gen_and_vec(vece, pd, pd, pg);
1120}
1121
3a7be554 1122static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1123{
1124 static const GVecGen4 op = {
1125 .fni8 = gen_and_pg_i64,
1126 .fniv = gen_and_pg_vec,
1127 .fno = gen_helper_sve_and_pppp,
1128 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1129 };
1130 if (a->s) {
1131 return do_pppp_flags(s, a, &op);
1132 } else if (a->rn == a->rm) {
1133 if (a->pg == a->rn) {
1134 return do_mov_p(s, a->rd, a->rn);
1135 } else {
1136 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1137 }
1138 } else if (a->pg == a->rn || a->pg == a->rm) {
1139 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1140 } else {
1141 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1142 }
1143}
1144
1145static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1146{
1147 tcg_gen_andc_i64(pd, pn, pm);
1148 tcg_gen_and_i64(pd, pd, pg);
1149}
1150
1151static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1152 TCGv_vec pm, TCGv_vec pg)
1153{
1154 tcg_gen_andc_vec(vece, pd, pn, pm);
1155 tcg_gen_and_vec(vece, pd, pd, pg);
1156}
1157
3a7be554 1158static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1159{
1160 static const GVecGen4 op = {
1161 .fni8 = gen_bic_pg_i64,
1162 .fniv = gen_bic_pg_vec,
1163 .fno = gen_helper_sve_bic_pppp,
1164 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1165 };
1166 if (a->s) {
1167 return do_pppp_flags(s, a, &op);
1168 } else if (a->pg == a->rn) {
1169 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1170 } else {
1171 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1172 }
1173}
1174
1175static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1176{
1177 tcg_gen_xor_i64(pd, pn, pm);
1178 tcg_gen_and_i64(pd, pd, pg);
1179}
1180
1181static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1182 TCGv_vec pm, TCGv_vec pg)
1183{
1184 tcg_gen_xor_vec(vece, pd, pn, pm);
1185 tcg_gen_and_vec(vece, pd, pd, pg);
1186}
1187
3a7be554 1188static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1189{
1190 static const GVecGen4 op = {
1191 .fni8 = gen_eor_pg_i64,
1192 .fniv = gen_eor_pg_vec,
1193 .fno = gen_helper_sve_eor_pppp,
1194 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1195 };
1196 if (a->s) {
1197 return do_pppp_flags(s, a, &op);
1198 } else {
1199 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1200 }
1201}
1202
1203static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1204{
1205 tcg_gen_and_i64(pn, pn, pg);
1206 tcg_gen_andc_i64(pm, pm, pg);
1207 tcg_gen_or_i64(pd, pn, pm);
1208}
1209
1210static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1211 TCGv_vec pm, TCGv_vec pg)
1212{
1213 tcg_gen_and_vec(vece, pn, pn, pg);
1214 tcg_gen_andc_vec(vece, pm, pm, pg);
1215 tcg_gen_or_vec(vece, pd, pn, pm);
1216}
1217
3a7be554 1218static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1219{
1220 static const GVecGen4 op = {
1221 .fni8 = gen_sel_pg_i64,
1222 .fniv = gen_sel_pg_vec,
1223 .fno = gen_helper_sve_sel_pppp,
1224 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1225 };
1226 if (a->s) {
1227 return false;
1228 } else {
1229 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1230 }
1231}
1232
1233static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1234{
1235 tcg_gen_or_i64(pd, pn, pm);
1236 tcg_gen_and_i64(pd, pd, pg);
1237}
1238
1239static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1240 TCGv_vec pm, TCGv_vec pg)
1241{
1242 tcg_gen_or_vec(vece, pd, pn, pm);
1243 tcg_gen_and_vec(vece, pd, pd, pg);
1244}
1245
3a7be554 1246static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1247{
1248 static const GVecGen4 op = {
1249 .fni8 = gen_orr_pg_i64,
1250 .fniv = gen_orr_pg_vec,
1251 .fno = gen_helper_sve_orr_pppp,
1252 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1253 };
1254 if (a->s) {
1255 return do_pppp_flags(s, a, &op);
1256 } else if (a->pg == a->rn && a->rn == a->rm) {
1257 return do_mov_p(s, a->rd, a->rn);
1258 } else {
1259 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1260 }
1261}
1262
1263static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1264{
1265 tcg_gen_orc_i64(pd, pn, pm);
1266 tcg_gen_and_i64(pd, pd, pg);
1267}
1268
1269static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1270 TCGv_vec pm, TCGv_vec pg)
1271{
1272 tcg_gen_orc_vec(vece, pd, pn, pm);
1273 tcg_gen_and_vec(vece, pd, pd, pg);
1274}
1275
3a7be554 1276static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1277{
1278 static const GVecGen4 op = {
1279 .fni8 = gen_orn_pg_i64,
1280 .fniv = gen_orn_pg_vec,
1281 .fno = gen_helper_sve_orn_pppp,
1282 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1283 };
1284 if (a->s) {
1285 return do_pppp_flags(s, a, &op);
1286 } else {
1287 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1288 }
1289}
1290
1291static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1292{
1293 tcg_gen_or_i64(pd, pn, pm);
1294 tcg_gen_andc_i64(pd, pg, pd);
1295}
1296
1297static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1298 TCGv_vec pm, TCGv_vec pg)
1299{
1300 tcg_gen_or_vec(vece, pd, pn, pm);
1301 tcg_gen_andc_vec(vece, pd, pg, pd);
1302}
1303
3a7be554 1304static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1305{
1306 static const GVecGen4 op = {
1307 .fni8 = gen_nor_pg_i64,
1308 .fniv = gen_nor_pg_vec,
1309 .fno = gen_helper_sve_nor_pppp,
1310 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1311 };
1312 if (a->s) {
1313 return do_pppp_flags(s, a, &op);
1314 } else {
1315 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1316 }
1317}
1318
1319static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1320{
1321 tcg_gen_and_i64(pd, pn, pm);
1322 tcg_gen_andc_i64(pd, pg, pd);
1323}
1324
1325static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326 TCGv_vec pm, TCGv_vec pg)
1327{
1328 tcg_gen_and_vec(vece, pd, pn, pm);
1329 tcg_gen_andc_vec(vece, pd, pg, pd);
1330}
1331
3a7be554 1332static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
516e246a
RH
1333{
1334 static const GVecGen4 op = {
1335 .fni8 = gen_nand_pg_i64,
1336 .fniv = gen_nand_pg_vec,
1337 .fno = gen_helper_sve_nand_pppp,
1338 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1339 };
1340 if (a->s) {
1341 return do_pppp_flags(s, a, &op);
1342 } else {
1343 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1344 }
1345}
1346
9e18d7a6
RH
1347/*
1348 *** SVE Predicate Misc Group
1349 */
1350
3a7be554 1351static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
9e18d7a6
RH
1352{
1353 if (sve_access_check(s)) {
1354 int nofs = pred_full_reg_offset(s, a->rn);
1355 int gofs = pred_full_reg_offset(s, a->pg);
1356 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1357
1358 if (words == 1) {
1359 TCGv_i64 pn = tcg_temp_new_i64();
1360 TCGv_i64 pg = tcg_temp_new_i64();
1361
1362 tcg_gen_ld_i64(pn, cpu_env, nofs);
1363 tcg_gen_ld_i64(pg, cpu_env, gofs);
1364 do_predtest1(pn, pg);
1365
1366 tcg_temp_free_i64(pn);
1367 tcg_temp_free_i64(pg);
1368 } else {
1369 do_predtest(s, nofs, gofs, words);
1370 }
1371 }
1372 return true;
1373}
1374
028e2a7b
RH
1375/* See the ARM pseudocode DecodePredCount. */
1376static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1377{
1378 unsigned elements = fullsz >> esz;
1379 unsigned bound;
1380
1381 switch (pattern) {
1382 case 0x0: /* POW2 */
1383 return pow2floor(elements);
1384 case 0x1: /* VL1 */
1385 case 0x2: /* VL2 */
1386 case 0x3: /* VL3 */
1387 case 0x4: /* VL4 */
1388 case 0x5: /* VL5 */
1389 case 0x6: /* VL6 */
1390 case 0x7: /* VL7 */
1391 case 0x8: /* VL8 */
1392 bound = pattern;
1393 break;
1394 case 0x9: /* VL16 */
1395 case 0xa: /* VL32 */
1396 case 0xb: /* VL64 */
1397 case 0xc: /* VL128 */
1398 case 0xd: /* VL256 */
1399 bound = 16 << (pattern - 9);
1400 break;
1401 case 0x1d: /* MUL4 */
1402 return elements - elements % 4;
1403 case 0x1e: /* MUL3 */
1404 return elements - elements % 3;
1405 case 0x1f: /* ALL */
1406 return elements;
1407 default: /* #uimm5 */
1408 return 0;
1409 }
1410 return elements >= bound ? bound : 0;
1411}
1412
1413/* This handles all of the predicate initialization instructions,
1414 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1415 * so that decode_pred_count returns 0. For SETFFR, we will have
1416 * set RD == 16 == FFR.
1417 */
1418static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1419{
1420 if (!sve_access_check(s)) {
1421 return true;
1422 }
1423
1424 unsigned fullsz = vec_full_reg_size(s);
1425 unsigned ofs = pred_full_reg_offset(s, rd);
1426 unsigned numelem, setsz, i;
1427 uint64_t word, lastword;
1428 TCGv_i64 t;
1429
1430 numelem = decode_pred_count(fullsz, pat, esz);
1431
1432 /* Determine what we must store into each bit, and how many. */
1433 if (numelem == 0) {
1434 lastword = word = 0;
1435 setsz = fullsz;
1436 } else {
1437 setsz = numelem << esz;
1438 lastword = word = pred_esz_masks[esz];
1439 if (setsz % 64) {
973558a3 1440 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1441 }
1442 }
1443
1444 t = tcg_temp_new_i64();
1445 if (fullsz <= 64) {
1446 tcg_gen_movi_i64(t, lastword);
1447 tcg_gen_st_i64(t, cpu_env, ofs);
1448 goto done;
1449 }
1450
1451 if (word == lastword) {
1452 unsigned maxsz = size_for_gvec(fullsz / 8);
1453 unsigned oprsz = size_for_gvec(setsz / 8);
1454
1455 if (oprsz * 8 == setsz) {
1456 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1457 goto done;
1458 }
028e2a7b
RH
1459 }
1460
1461 setsz /= 8;
1462 fullsz /= 8;
1463
1464 tcg_gen_movi_i64(t, word);
973558a3 1465 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1466 tcg_gen_st_i64(t, cpu_env, ofs + i);
1467 }
1468 if (lastword != word) {
1469 tcg_gen_movi_i64(t, lastword);
1470 tcg_gen_st_i64(t, cpu_env, ofs + i);
1471 i += 8;
1472 }
1473 if (i < fullsz) {
1474 tcg_gen_movi_i64(t, 0);
1475 for (; i < fullsz; i += 8) {
1476 tcg_gen_st_i64(t, cpu_env, ofs + i);
1477 }
1478 }
1479
1480 done:
1481 tcg_temp_free_i64(t);
1482
1483 /* PTRUES */
1484 if (setflag) {
1485 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1486 tcg_gen_movi_i32(cpu_CF, word == 0);
1487 tcg_gen_movi_i32(cpu_VF, 0);
1488 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1489 }
1490 return true;
1491}
1492
3a7be554 1493static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
028e2a7b
RH
1494{
1495 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1496}
1497
3a7be554 1498static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
028e2a7b
RH
1499{
1500 /* Note pat == 31 is #all, to set all elements. */
1501 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1502}
1503
3a7be554 1504static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
028e2a7b
RH
1505{
1506 /* Note pat == 32 is #unimp, to set no elements. */
1507 return do_predset(s, 0, a->rd, 32, false);
1508}
1509
3a7be554 1510static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
028e2a7b
RH
1511{
1512 /* The path through do_pppp_flags is complicated enough to want to avoid
1513 * duplication. Frob the arguments into the form of a predicated AND.
1514 */
1515 arg_rprr_s alt_a = {
1516 .rd = a->rd, .pg = a->pg, .s = a->s,
1517 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1518 };
3a7be554 1519 return trans_AND_pppp(s, &alt_a);
028e2a7b
RH
1520}
1521
3a7be554 1522static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
028e2a7b
RH
1523{
1524 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1525}
1526
3a7be554 1527static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
028e2a7b
RH
1528{
1529 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1530}
1531
1532static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1533 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1534 TCGv_ptr, TCGv_i32))
1535{
1536 if (!sve_access_check(s)) {
1537 return true;
1538 }
1539
1540 TCGv_ptr t_pd = tcg_temp_new_ptr();
1541 TCGv_ptr t_pg = tcg_temp_new_ptr();
1542 TCGv_i32 t;
1543 unsigned desc;
1544
1545 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1546 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1547
1548 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1549 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1550 t = tcg_const_i32(desc);
1551
1552 gen_fn(t, t_pd, t_pg, t);
1553 tcg_temp_free_ptr(t_pd);
1554 tcg_temp_free_ptr(t_pg);
1555
1556 do_pred_flags(t);
1557 tcg_temp_free_i32(t);
1558 return true;
1559}
1560
3a7be554 1561static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1562{
1563 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1564}
1565
3a7be554 1566static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
028e2a7b
RH
1567{
1568 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1569}
1570
24e82e68
RH
1571/*
1572 *** SVE Element Count Group
1573 */
1574
1575/* Perform an inline saturating addition of a 32-bit value within
1576 * a 64-bit register. The second operand is known to be positive,
1577 * which halves the comparisions we must perform to bound the result.
1578 */
1579static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1580{
1581 int64_t ibound;
1582 TCGv_i64 bound;
1583 TCGCond cond;
1584
1585 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1586 if (u) {
1587 tcg_gen_ext32u_i64(reg, reg);
1588 } else {
1589 tcg_gen_ext32s_i64(reg, reg);
1590 }
1591 if (d) {
1592 tcg_gen_sub_i64(reg, reg, val);
1593 ibound = (u ? 0 : INT32_MIN);
1594 cond = TCG_COND_LT;
1595 } else {
1596 tcg_gen_add_i64(reg, reg, val);
1597 ibound = (u ? UINT32_MAX : INT32_MAX);
1598 cond = TCG_COND_GT;
1599 }
1600 bound = tcg_const_i64(ibound);
1601 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1602 tcg_temp_free_i64(bound);
1603}
1604
1605/* Similarly with 64-bit values. */
1606static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1607{
1608 TCGv_i64 t0 = tcg_temp_new_i64();
1609 TCGv_i64 t1 = tcg_temp_new_i64();
1610 TCGv_i64 t2;
1611
1612 if (u) {
1613 if (d) {
1614 tcg_gen_sub_i64(t0, reg, val);
1615 tcg_gen_movi_i64(t1, 0);
1616 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1617 } else {
1618 tcg_gen_add_i64(t0, reg, val);
1619 tcg_gen_movi_i64(t1, -1);
1620 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1621 }
1622 } else {
1623 if (d) {
1624 /* Detect signed overflow for subtraction. */
1625 tcg_gen_xor_i64(t0, reg, val);
1626 tcg_gen_sub_i64(t1, reg, val);
7a31e0c6 1627 tcg_gen_xor_i64(reg, reg, t1);
24e82e68
RH
1628 tcg_gen_and_i64(t0, t0, reg);
1629
1630 /* Bound the result. */
1631 tcg_gen_movi_i64(reg, INT64_MIN);
1632 t2 = tcg_const_i64(0);
1633 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1634 } else {
1635 /* Detect signed overflow for addition. */
1636 tcg_gen_xor_i64(t0, reg, val);
1637 tcg_gen_add_i64(reg, reg, val);
1638 tcg_gen_xor_i64(t1, reg, val);
1639 tcg_gen_andc_i64(t0, t1, t0);
1640
1641 /* Bound the result. */
1642 tcg_gen_movi_i64(t1, INT64_MAX);
1643 t2 = tcg_const_i64(0);
1644 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1645 }
1646 tcg_temp_free_i64(t2);
1647 }
1648 tcg_temp_free_i64(t0);
1649 tcg_temp_free_i64(t1);
1650}
1651
1652/* Similarly with a vector and a scalar operand. */
1653static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1654 TCGv_i64 val, bool u, bool d)
1655{
1656 unsigned vsz = vec_full_reg_size(s);
1657 TCGv_ptr dptr, nptr;
1658 TCGv_i32 t32, desc;
1659 TCGv_i64 t64;
1660
1661 dptr = tcg_temp_new_ptr();
1662 nptr = tcg_temp_new_ptr();
1663 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1664 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1665 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1666
1667 switch (esz) {
1668 case MO_8:
1669 t32 = tcg_temp_new_i32();
1670 tcg_gen_extrl_i64_i32(t32, val);
1671 if (d) {
1672 tcg_gen_neg_i32(t32, t32);
1673 }
1674 if (u) {
1675 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1676 } else {
1677 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1678 }
1679 tcg_temp_free_i32(t32);
1680 break;
1681
1682 case MO_16:
1683 t32 = tcg_temp_new_i32();
1684 tcg_gen_extrl_i64_i32(t32, val);
1685 if (d) {
1686 tcg_gen_neg_i32(t32, t32);
1687 }
1688 if (u) {
1689 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1690 } else {
1691 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1692 }
1693 tcg_temp_free_i32(t32);
1694 break;
1695
1696 case MO_32:
1697 t64 = tcg_temp_new_i64();
1698 if (d) {
1699 tcg_gen_neg_i64(t64, val);
1700 } else {
1701 tcg_gen_mov_i64(t64, val);
1702 }
1703 if (u) {
1704 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1705 } else {
1706 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1707 }
1708 tcg_temp_free_i64(t64);
1709 break;
1710
1711 case MO_64:
1712 if (u) {
1713 if (d) {
1714 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1715 } else {
1716 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1717 }
1718 } else if (d) {
1719 t64 = tcg_temp_new_i64();
1720 tcg_gen_neg_i64(t64, val);
1721 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1722 tcg_temp_free_i64(t64);
1723 } else {
1724 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1725 }
1726 break;
1727
1728 default:
1729 g_assert_not_reached();
1730 }
1731
1732 tcg_temp_free_ptr(dptr);
1733 tcg_temp_free_ptr(nptr);
1734 tcg_temp_free_i32(desc);
1735}
1736
3a7be554 1737static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
24e82e68
RH
1738{
1739 if (sve_access_check(s)) {
1740 unsigned fullsz = vec_full_reg_size(s);
1741 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1742 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1743 }
1744 return true;
1745}
1746
3a7be554 1747static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1748{
1749 if (sve_access_check(s)) {
1750 unsigned fullsz = vec_full_reg_size(s);
1751 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752 int inc = numelem * a->imm * (a->d ? -1 : 1);
1753 TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755 tcg_gen_addi_i64(reg, reg, inc);
1756 }
1757 return true;
1758}
1759
3a7be554 1760static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1761{
1762 if (!sve_access_check(s)) {
1763 return true;
1764 }
1765
1766 unsigned fullsz = vec_full_reg_size(s);
1767 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1768 int inc = numelem * a->imm;
1769 TCGv_i64 reg = cpu_reg(s, a->rd);
1770
1771 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1772 if (inc == 0) {
1773 if (a->u) {
1774 tcg_gen_ext32u_i64(reg, reg);
1775 } else {
1776 tcg_gen_ext32s_i64(reg, reg);
1777 }
1778 } else {
1779 TCGv_i64 t = tcg_const_i64(inc);
1780 do_sat_addsub_32(reg, t, a->u, a->d);
1781 tcg_temp_free_i64(t);
1782 }
1783 return true;
1784}
1785
3a7be554 1786static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
24e82e68
RH
1787{
1788 if (!sve_access_check(s)) {
1789 return true;
1790 }
1791
1792 unsigned fullsz = vec_full_reg_size(s);
1793 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1794 int inc = numelem * a->imm;
1795 TCGv_i64 reg = cpu_reg(s, a->rd);
1796
1797 if (inc != 0) {
1798 TCGv_i64 t = tcg_const_i64(inc);
1799 do_sat_addsub_64(reg, t, a->u, a->d);
1800 tcg_temp_free_i64(t);
1801 }
1802 return true;
1803}
1804
3a7be554 1805static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1806{
1807 if (a->esz == 0) {
1808 return false;
1809 }
1810
1811 unsigned fullsz = vec_full_reg_size(s);
1812 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1813 int inc = numelem * a->imm;
1814
1815 if (inc != 0) {
1816 if (sve_access_check(s)) {
1817 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1818 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1819 vec_full_reg_offset(s, a->rn),
1820 t, fullsz, fullsz);
1821 tcg_temp_free_i64(t);
1822 }
1823 } else {
1824 do_mov_z(s, a->rd, a->rn);
1825 }
1826 return true;
1827}
1828
3a7be554 1829static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
24e82e68
RH
1830{
1831 if (a->esz == 0) {
1832 return false;
1833 }
1834
1835 unsigned fullsz = vec_full_reg_size(s);
1836 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1837 int inc = numelem * a->imm;
1838
1839 if (inc != 0) {
1840 if (sve_access_check(s)) {
1841 TCGv_i64 t = tcg_const_i64(inc);
1842 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1843 tcg_temp_free_i64(t);
1844 }
1845 } else {
1846 do_mov_z(s, a->rd, a->rn);
1847 }
1848 return true;
1849}
1850
e1fa1164
RH
1851/*
1852 *** SVE Bitwise Immediate Group
1853 */
1854
1855static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1856{
1857 uint64_t imm;
1858 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1859 extract32(a->dbm, 0, 6),
1860 extract32(a->dbm, 6, 6))) {
1861 return false;
1862 }
1863 if (sve_access_check(s)) {
1864 unsigned vsz = vec_full_reg_size(s);
1865 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1866 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1867 }
1868 return true;
1869}
1870
3a7be554 1871static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1872{
1873 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1874}
1875
3a7be554 1876static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1877{
1878 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1879}
1880
3a7be554 1881static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
e1fa1164
RH
1882{
1883 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1884}
1885
3a7be554 1886static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
e1fa1164
RH
1887{
1888 uint64_t imm;
1889 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1890 extract32(a->dbm, 0, 6),
1891 extract32(a->dbm, 6, 6))) {
1892 return false;
1893 }
1894 if (sve_access_check(s)) {
1895 do_dupi_z(s, a->rd, imm);
1896 }
1897 return true;
1898}
1899
f25a2361
RH
1900/*
1901 *** SVE Integer Wide Immediate - Predicated Group
1902 */
1903
1904/* Implement all merging copies. This is used for CPY (immediate),
1905 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1906 */
1907static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1908 TCGv_i64 val)
1909{
1910 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1911 static gen_cpy * const fns[4] = {
1912 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1913 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1914 };
1915 unsigned vsz = vec_full_reg_size(s);
1916 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1917 TCGv_ptr t_zd = tcg_temp_new_ptr();
1918 TCGv_ptr t_zn = tcg_temp_new_ptr();
1919 TCGv_ptr t_pg = tcg_temp_new_ptr();
1920
1921 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1922 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1923 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1924
1925 fns[esz](t_zd, t_zn, t_pg, val, desc);
1926
1927 tcg_temp_free_ptr(t_zd);
1928 tcg_temp_free_ptr(t_zn);
1929 tcg_temp_free_ptr(t_pg);
1930 tcg_temp_free_i32(desc);
1931}
1932
3a7be554 1933static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
f25a2361
RH
1934{
1935 if (a->esz == 0) {
1936 return false;
1937 }
1938 if (sve_access_check(s)) {
1939 /* Decode the VFP immediate. */
1940 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1941 TCGv_i64 t_imm = tcg_const_i64(imm);
1942 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943 tcg_temp_free_i64(t_imm);
1944 }
1945 return true;
1946}
1947
3a7be554 1948static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
f25a2361 1949{
3a7be554 1950 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1951 return false;
1952 }
1953 if (sve_access_check(s)) {
1954 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1955 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1956 tcg_temp_free_i64(t_imm);
1957 }
1958 return true;
1959}
1960
3a7be554 1961static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
f25a2361
RH
1962{
1963 static gen_helper_gvec_2i * const fns[4] = {
1964 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1965 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1966 };
1967
3a7be554 1968 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
f25a2361
RH
1969 return false;
1970 }
1971 if (sve_access_check(s)) {
1972 unsigned vsz = vec_full_reg_size(s);
1973 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1974 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1975 pred_full_reg_offset(s, a->pg),
1976 t_imm, vsz, vsz, 0, fns[a->esz]);
1977 tcg_temp_free_i64(t_imm);
1978 }
1979 return true;
1980}
1981
b94f8f60
RH
1982/*
1983 *** SVE Permute Extract Group
1984 */
1985
3a7be554 1986static bool trans_EXT(DisasContext *s, arg_EXT *a)
b94f8f60
RH
1987{
1988 if (!sve_access_check(s)) {
1989 return true;
1990 }
1991
1992 unsigned vsz = vec_full_reg_size(s);
1993 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1994 unsigned n_siz = vsz - n_ofs;
1995 unsigned d = vec_full_reg_offset(s, a->rd);
1996 unsigned n = vec_full_reg_offset(s, a->rn);
1997 unsigned m = vec_full_reg_offset(s, a->rm);
1998
1999 /* Use host vector move insns if we have appropriate sizes
2000 * and no unfortunate overlap.
2001 */
2002 if (m != d
2003 && n_ofs == size_for_gvec(n_ofs)
2004 && n_siz == size_for_gvec(n_siz)
2005 && (d != n || n_siz <= n_ofs)) {
2006 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2007 if (n_ofs != 0) {
2008 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2009 }
2010 } else {
2011 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2012 }
2013 return true;
2014}
2015
30562ab7
RH
2016/*
2017 *** SVE Permute - Unpredicated Group
2018 */
2019
3a7be554 2020static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
30562ab7
RH
2021{
2022 if (sve_access_check(s)) {
2023 unsigned vsz = vec_full_reg_size(s);
2024 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2025 vsz, vsz, cpu_reg_sp(s, a->rn));
2026 }
2027 return true;
2028}
2029
3a7be554 2030static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
30562ab7
RH
2031{
2032 if ((a->imm & 0x1f) == 0) {
2033 return false;
2034 }
2035 if (sve_access_check(s)) {
2036 unsigned vsz = vec_full_reg_size(s);
2037 unsigned dofs = vec_full_reg_offset(s, a->rd);
2038 unsigned esz, index;
2039
2040 esz = ctz32(a->imm);
2041 index = a->imm >> (esz + 1);
2042
2043 if ((index << esz) < vsz) {
2044 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2045 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2046 } else {
2047 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2048 }
2049 }
2050 return true;
2051}
2052
2053static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2054{
2055 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2056 static gen_insr * const fns[4] = {
2057 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2058 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2059 };
2060 unsigned vsz = vec_full_reg_size(s);
2061 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2062 TCGv_ptr t_zd = tcg_temp_new_ptr();
2063 TCGv_ptr t_zn = tcg_temp_new_ptr();
2064
2065 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2066 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2067
2068 fns[a->esz](t_zd, t_zn, val, desc);
2069
2070 tcg_temp_free_ptr(t_zd);
2071 tcg_temp_free_ptr(t_zn);
2072 tcg_temp_free_i32(desc);
2073}
2074
3a7be554 2075static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2076{
2077 if (sve_access_check(s)) {
2078 TCGv_i64 t = tcg_temp_new_i64();
2079 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2080 do_insr_i64(s, a, t);
2081 tcg_temp_free_i64(t);
2082 }
2083 return true;
2084}
2085
3a7be554 2086static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2087{
2088 if (sve_access_check(s)) {
2089 do_insr_i64(s, a, cpu_reg(s, a->rm));
2090 }
2091 return true;
2092}
2093
3a7be554 2094static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
30562ab7
RH
2095{
2096 static gen_helper_gvec_2 * const fns[4] = {
2097 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2098 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2099 };
2100
2101 if (sve_access_check(s)) {
2102 unsigned vsz = vec_full_reg_size(s);
2103 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2104 vec_full_reg_offset(s, a->rn),
2105 vsz, vsz, 0, fns[a->esz]);
2106 }
2107 return true;
2108}
2109
3a7be554 2110static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
30562ab7
RH
2111{
2112 static gen_helper_gvec_3 * const fns[4] = {
2113 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2114 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2115 };
2116
2117 if (sve_access_check(s)) {
2118 unsigned vsz = vec_full_reg_size(s);
2119 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2120 vec_full_reg_offset(s, a->rn),
2121 vec_full_reg_offset(s, a->rm),
2122 vsz, vsz, 0, fns[a->esz]);
2123 }
2124 return true;
2125}
2126
3a7be554 2127static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
30562ab7
RH
2128{
2129 static gen_helper_gvec_2 * const fns[4][2] = {
2130 { NULL, NULL },
2131 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2132 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2133 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2134 };
2135
2136 if (a->esz == 0) {
2137 return false;
2138 }
2139 if (sve_access_check(s)) {
2140 unsigned vsz = vec_full_reg_size(s);
2141 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2142 vec_full_reg_offset(s, a->rn)
2143 + (a->h ? vsz / 2 : 0),
2144 vsz, vsz, 0, fns[a->esz][a->u]);
2145 }
2146 return true;
2147}
2148
d731d8cb
RH
2149/*
2150 *** SVE Permute - Predicates Group
2151 */
2152
2153static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2154 gen_helper_gvec_3 *fn)
2155{
2156 if (!sve_access_check(s)) {
2157 return true;
2158 }
2159
2160 unsigned vsz = pred_full_reg_size(s);
2161
2162 /* Predicate sizes may be smaller and cannot use simd_desc.
2163 We cannot round up, as we do elsewhere, because we need
2164 the exact size for ZIP2 and REV. We retain the style for
2165 the other helpers for consistency. */
2166 TCGv_ptr t_d = tcg_temp_new_ptr();
2167 TCGv_ptr t_n = tcg_temp_new_ptr();
2168 TCGv_ptr t_m = tcg_temp_new_ptr();
2169 TCGv_i32 t_desc;
2170 int desc;
2171
2172 desc = vsz - 2;
2173 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2174 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2175
2176 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2177 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2178 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2179 t_desc = tcg_const_i32(desc);
2180
2181 fn(t_d, t_n, t_m, t_desc);
2182
2183 tcg_temp_free_ptr(t_d);
2184 tcg_temp_free_ptr(t_n);
2185 tcg_temp_free_ptr(t_m);
2186 tcg_temp_free_i32(t_desc);
2187 return true;
2188}
2189
2190static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2191 gen_helper_gvec_2 *fn)
2192{
2193 if (!sve_access_check(s)) {
2194 return true;
2195 }
2196
2197 unsigned vsz = pred_full_reg_size(s);
2198 TCGv_ptr t_d = tcg_temp_new_ptr();
2199 TCGv_ptr t_n = tcg_temp_new_ptr();
2200 TCGv_i32 t_desc;
2201 int desc;
2202
2203 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2204 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2205
2206 /* Predicate sizes may be smaller and cannot use simd_desc.
2207 We cannot round up, as we do elsewhere, because we need
2208 the exact size for ZIP2 and REV. We retain the style for
2209 the other helpers for consistency. */
2210
2211 desc = vsz - 2;
2212 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2213 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2214 t_desc = tcg_const_i32(desc);
2215
2216 fn(t_d, t_n, t_desc);
2217
2218 tcg_temp_free_i32(t_desc);
2219 tcg_temp_free_ptr(t_d);
2220 tcg_temp_free_ptr(t_n);
2221 return true;
2222}
2223
3a7be554 2224static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2225{
2226 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2227}
2228
3a7be554 2229static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2230{
2231 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2232}
2233
3a7be554 2234static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2235{
2236 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2237}
2238
3a7be554 2239static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2240{
2241 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2242}
2243
3a7be554 2244static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2245{
2246 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2247}
2248
3a7be554 2249static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
d731d8cb
RH
2250{
2251 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2252}
2253
3a7be554 2254static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
d731d8cb
RH
2255{
2256 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2257}
2258
3a7be554 2259static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
d731d8cb
RH
2260{
2261 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2262}
2263
3a7be554 2264static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
d731d8cb
RH
2265{
2266 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2267}
2268
234b48e9
RH
2269/*
2270 *** SVE Permute - Interleaving Group
2271 */
2272
2273static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2274{
2275 static gen_helper_gvec_3 * const fns[4] = {
2276 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2277 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2278 };
2279
2280 if (sve_access_check(s)) {
2281 unsigned vsz = vec_full_reg_size(s);
2282 unsigned high_ofs = high ? vsz / 2 : 0;
2283 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2284 vec_full_reg_offset(s, a->rn) + high_ofs,
2285 vec_full_reg_offset(s, a->rm) + high_ofs,
2286 vsz, vsz, 0, fns[a->esz]);
2287 }
2288 return true;
2289}
2290
2291static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2292 gen_helper_gvec_3 *fn)
2293{
2294 if (sve_access_check(s)) {
2295 unsigned vsz = vec_full_reg_size(s);
2296 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2297 vec_full_reg_offset(s, a->rn),
2298 vec_full_reg_offset(s, a->rm),
2299 vsz, vsz, data, fn);
2300 }
2301 return true;
2302}
2303
3a7be554 2304static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2305{
2306 return do_zip(s, a, false);
2307}
2308
3a7be554 2309static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2310{
2311 return do_zip(s, a, true);
2312}
2313
2314static gen_helper_gvec_3 * const uzp_fns[4] = {
2315 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2316 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2317};
2318
3a7be554 2319static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2320{
2321 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2322}
2323
3a7be554 2324static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2325{
2326 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2327}
2328
2329static gen_helper_gvec_3 * const trn_fns[4] = {
2330 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2331 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2332};
2333
3a7be554 2334static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2335{
2336 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2337}
2338
3a7be554 2339static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
234b48e9
RH
2340{
2341 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2342}
2343
3ca879ae
RH
2344/*
2345 *** SVE Permute Vector - Predicated Group
2346 */
2347
3a7be554 2348static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
3ca879ae
RH
2349{
2350 static gen_helper_gvec_3 * const fns[4] = {
2351 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2352 };
2353 return do_zpz_ool(s, a, fns[a->esz]);
2354}
2355
ef23cb72
RH
2356/* Call the helper that computes the ARM LastActiveElement pseudocode
2357 * function, scaled by the element size. This includes the not found
2358 * indication; e.g. not found for esz=3 is -8.
2359 */
2360static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2361{
2362 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2363 * round up, as we do elsewhere, because we need the exact size.
2364 */
2365 TCGv_ptr t_p = tcg_temp_new_ptr();
2366 TCGv_i32 t_desc;
2367 unsigned vsz = pred_full_reg_size(s);
2368 unsigned desc;
2369
2370 desc = vsz - 2;
2371 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2372
2373 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2374 t_desc = tcg_const_i32(desc);
2375
2376 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2377
2378 tcg_temp_free_i32(t_desc);
2379 tcg_temp_free_ptr(t_p);
2380}
2381
2382/* Increment LAST to the offset of the next element in the vector,
2383 * wrapping around to 0.
2384 */
2385static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2386{
2387 unsigned vsz = vec_full_reg_size(s);
2388
2389 tcg_gen_addi_i32(last, last, 1 << esz);
2390 if (is_power_of_2(vsz)) {
2391 tcg_gen_andi_i32(last, last, vsz - 1);
2392 } else {
2393 TCGv_i32 max = tcg_const_i32(vsz);
2394 TCGv_i32 zero = tcg_const_i32(0);
2395 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2396 tcg_temp_free_i32(max);
2397 tcg_temp_free_i32(zero);
2398 }
2399}
2400
2401/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2402static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2403{
2404 unsigned vsz = vec_full_reg_size(s);
2405
2406 if (is_power_of_2(vsz)) {
2407 tcg_gen_andi_i32(last, last, vsz - 1);
2408 } else {
2409 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2410 TCGv_i32 zero = tcg_const_i32(0);
2411 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2412 tcg_temp_free_i32(max);
2413 tcg_temp_free_i32(zero);
2414 }
2415}
2416
2417/* Load an unsigned element of ESZ from BASE+OFS. */
2418static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2419{
2420 TCGv_i64 r = tcg_temp_new_i64();
2421
2422 switch (esz) {
2423 case 0:
2424 tcg_gen_ld8u_i64(r, base, ofs);
2425 break;
2426 case 1:
2427 tcg_gen_ld16u_i64(r, base, ofs);
2428 break;
2429 case 2:
2430 tcg_gen_ld32u_i64(r, base, ofs);
2431 break;
2432 case 3:
2433 tcg_gen_ld_i64(r, base, ofs);
2434 break;
2435 default:
2436 g_assert_not_reached();
2437 }
2438 return r;
2439}
2440
2441/* Load an unsigned element of ESZ from RM[LAST]. */
2442static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2443 int rm, int esz)
2444{
2445 TCGv_ptr p = tcg_temp_new_ptr();
2446 TCGv_i64 r;
2447
2448 /* Convert offset into vector into offset into ENV.
2449 * The final adjustment for the vector register base
2450 * is added via constant offset to the load.
2451 */
2452#ifdef HOST_WORDS_BIGENDIAN
2453 /* Adjust for element ordering. See vec_reg_offset. */
2454 if (esz < 3) {
2455 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2456 }
2457#endif
2458 tcg_gen_ext_i32_ptr(p, last);
2459 tcg_gen_add_ptr(p, p, cpu_env);
2460
2461 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2462 tcg_temp_free_ptr(p);
2463
2464 return r;
2465}
2466
2467/* Compute CLAST for a Zreg. */
2468static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2469{
2470 TCGv_i32 last;
2471 TCGLabel *over;
2472 TCGv_i64 ele;
2473 unsigned vsz, esz = a->esz;
2474
2475 if (!sve_access_check(s)) {
2476 return true;
2477 }
2478
2479 last = tcg_temp_local_new_i32();
2480 over = gen_new_label();
2481
2482 find_last_active(s, last, esz, a->pg);
2483
2484 /* There is of course no movcond for a 2048-bit vector,
2485 * so we must branch over the actual store.
2486 */
2487 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2488
2489 if (!before) {
2490 incr_last_active(s, last, esz);
2491 }
2492
2493 ele = load_last_active(s, last, a->rm, esz);
2494 tcg_temp_free_i32(last);
2495
2496 vsz = vec_full_reg_size(s);
2497 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2498 tcg_temp_free_i64(ele);
2499
2500 /* If this insn used MOVPRFX, we may need a second move. */
2501 if (a->rd != a->rn) {
2502 TCGLabel *done = gen_new_label();
2503 tcg_gen_br(done);
2504
2505 gen_set_label(over);
2506 do_mov_z(s, a->rd, a->rn);
2507
2508 gen_set_label(done);
2509 } else {
2510 gen_set_label(over);
2511 }
2512 return true;
2513}
2514
3a7be554 2515static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2516{
2517 return do_clast_vector(s, a, false);
2518}
2519
3a7be554 2520static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
ef23cb72
RH
2521{
2522 return do_clast_vector(s, a, true);
2523}
2524
2525/* Compute CLAST for a scalar. */
2526static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2527 bool before, TCGv_i64 reg_val)
2528{
2529 TCGv_i32 last = tcg_temp_new_i32();
2530 TCGv_i64 ele, cmp, zero;
2531
2532 find_last_active(s, last, esz, pg);
2533
2534 /* Extend the original value of last prior to incrementing. */
2535 cmp = tcg_temp_new_i64();
2536 tcg_gen_ext_i32_i64(cmp, last);
2537
2538 if (!before) {
2539 incr_last_active(s, last, esz);
2540 }
2541
2542 /* The conceit here is that while last < 0 indicates not found, after
2543 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2544 * from which we can load garbage. We then discard the garbage with
2545 * a conditional move.
2546 */
2547 ele = load_last_active(s, last, rm, esz);
2548 tcg_temp_free_i32(last);
2549
2550 zero = tcg_const_i64(0);
2551 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2552
2553 tcg_temp_free_i64(zero);
2554 tcg_temp_free_i64(cmp);
2555 tcg_temp_free_i64(ele);
2556}
2557
2558/* Compute CLAST for a Vreg. */
2559static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2560{
2561 if (sve_access_check(s)) {
2562 int esz = a->esz;
2563 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2564 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2565
2566 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2567 write_fp_dreg(s, a->rd, reg);
2568 tcg_temp_free_i64(reg);
2569 }
2570 return true;
2571}
2572
3a7be554 2573static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2574{
2575 return do_clast_fp(s, a, false);
2576}
2577
3a7be554 2578static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2579{
2580 return do_clast_fp(s, a, true);
2581}
2582
2583/* Compute CLAST for a Xreg. */
2584static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2585{
2586 TCGv_i64 reg;
2587
2588 if (!sve_access_check(s)) {
2589 return true;
2590 }
2591
2592 reg = cpu_reg(s, a->rd);
2593 switch (a->esz) {
2594 case 0:
2595 tcg_gen_ext8u_i64(reg, reg);
2596 break;
2597 case 1:
2598 tcg_gen_ext16u_i64(reg, reg);
2599 break;
2600 case 2:
2601 tcg_gen_ext32u_i64(reg, reg);
2602 break;
2603 case 3:
2604 break;
2605 default:
2606 g_assert_not_reached();
2607 }
2608
2609 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2610 return true;
2611}
2612
3a7be554 2613static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2614{
2615 return do_clast_general(s, a, false);
2616}
2617
3a7be554 2618static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2619{
2620 return do_clast_general(s, a, true);
2621}
2622
2623/* Compute LAST for a scalar. */
2624static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2625 int pg, int rm, bool before)
2626{
2627 TCGv_i32 last = tcg_temp_new_i32();
2628 TCGv_i64 ret;
2629
2630 find_last_active(s, last, esz, pg);
2631 if (before) {
2632 wrap_last_active(s, last, esz);
2633 } else {
2634 incr_last_active(s, last, esz);
2635 }
2636
2637 ret = load_last_active(s, last, rm, esz);
2638 tcg_temp_free_i32(last);
2639 return ret;
2640}
2641
2642/* Compute LAST for a Vreg. */
2643static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2644{
2645 if (sve_access_check(s)) {
2646 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2647 write_fp_dreg(s, a->rd, val);
2648 tcg_temp_free_i64(val);
2649 }
2650 return true;
2651}
2652
3a7be554 2653static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2654{
2655 return do_last_fp(s, a, false);
2656}
2657
3a7be554 2658static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2659{
2660 return do_last_fp(s, a, true);
2661}
2662
2663/* Compute LAST for a Xreg. */
2664static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2665{
2666 if (sve_access_check(s)) {
2667 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2668 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2669 tcg_temp_free_i64(val);
2670 }
2671 return true;
2672}
2673
3a7be554 2674static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2675{
2676 return do_last_general(s, a, false);
2677}
2678
3a7be554 2679static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
ef23cb72
RH
2680{
2681 return do_last_general(s, a, true);
2682}
2683
3a7be554 2684static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2685{
2686 if (sve_access_check(s)) {
2687 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2688 }
2689 return true;
2690}
2691
3a7be554 2692static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
792a5578
RH
2693{
2694 if (sve_access_check(s)) {
2695 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2696 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2697 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2698 tcg_temp_free_i64(t);
2699 }
2700 return true;
2701}
2702
3a7be554 2703static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2704{
2705 static gen_helper_gvec_3 * const fns[4] = {
2706 NULL,
2707 gen_helper_sve_revb_h,
2708 gen_helper_sve_revb_s,
2709 gen_helper_sve_revb_d,
2710 };
2711 return do_zpz_ool(s, a, fns[a->esz]);
2712}
2713
3a7be554 2714static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2715{
2716 static gen_helper_gvec_3 * const fns[4] = {
2717 NULL,
2718 NULL,
2719 gen_helper_sve_revh_s,
2720 gen_helper_sve_revh_d,
2721 };
2722 return do_zpz_ool(s, a, fns[a->esz]);
2723}
2724
3a7be554 2725static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2726{
2727 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2728}
2729
3a7be554 2730static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
dae8fb90
RH
2731{
2732 static gen_helper_gvec_3 * const fns[4] = {
2733 gen_helper_sve_rbit_b,
2734 gen_helper_sve_rbit_h,
2735 gen_helper_sve_rbit_s,
2736 gen_helper_sve_rbit_d,
2737 };
2738 return do_zpz_ool(s, a, fns[a->esz]);
2739}
2740
3a7be554 2741static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
b48ff240
RH
2742{
2743 if (sve_access_check(s)) {
2744 unsigned vsz = vec_full_reg_size(s);
2745 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2746 vec_full_reg_offset(s, a->rn),
2747 vec_full_reg_offset(s, a->rm),
2748 pred_full_reg_offset(s, a->pg),
2749 vsz, vsz, a->esz, gen_helper_sve_splice);
2750 }
2751 return true;
2752}
2753
757f9cff
RH
2754/*
2755 *** SVE Integer Compare - Vectors Group
2756 */
2757
2758static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2759 gen_helper_gvec_flags_4 *gen_fn)
2760{
2761 TCGv_ptr pd, zn, zm, pg;
2762 unsigned vsz;
2763 TCGv_i32 t;
2764
2765 if (gen_fn == NULL) {
2766 return false;
2767 }
2768 if (!sve_access_check(s)) {
2769 return true;
2770 }
2771
2772 vsz = vec_full_reg_size(s);
2773 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2774 pd = tcg_temp_new_ptr();
2775 zn = tcg_temp_new_ptr();
2776 zm = tcg_temp_new_ptr();
2777 pg = tcg_temp_new_ptr();
2778
2779 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2780 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2781 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2782 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2783
2784 gen_fn(t, pd, zn, zm, pg, t);
2785
2786 tcg_temp_free_ptr(pd);
2787 tcg_temp_free_ptr(zn);
2788 tcg_temp_free_ptr(zm);
2789 tcg_temp_free_ptr(pg);
2790
2791 do_pred_flags(t);
2792
2793 tcg_temp_free_i32(t);
2794 return true;
2795}
2796
2797#define DO_PPZZ(NAME, name) \
3a7be554 2798static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2799{ \
2800 static gen_helper_gvec_flags_4 * const fns[4] = { \
2801 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2802 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2803 }; \
2804 return do_ppzz_flags(s, a, fns[a->esz]); \
2805}
2806
2807DO_PPZZ(CMPEQ, cmpeq)
2808DO_PPZZ(CMPNE, cmpne)
2809DO_PPZZ(CMPGT, cmpgt)
2810DO_PPZZ(CMPGE, cmpge)
2811DO_PPZZ(CMPHI, cmphi)
2812DO_PPZZ(CMPHS, cmphs)
2813
2814#undef DO_PPZZ
2815
2816#define DO_PPZW(NAME, name) \
3a7be554 2817static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
757f9cff
RH
2818{ \
2819 static gen_helper_gvec_flags_4 * const fns[4] = { \
2820 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2821 gen_helper_sve_##name##_ppzw_s, NULL \
2822 }; \
2823 return do_ppzz_flags(s, a, fns[a->esz]); \
2824}
2825
2826DO_PPZW(CMPEQ, cmpeq)
2827DO_PPZW(CMPNE, cmpne)
2828DO_PPZW(CMPGT, cmpgt)
2829DO_PPZW(CMPGE, cmpge)
2830DO_PPZW(CMPHI, cmphi)
2831DO_PPZW(CMPHS, cmphs)
2832DO_PPZW(CMPLT, cmplt)
2833DO_PPZW(CMPLE, cmple)
2834DO_PPZW(CMPLO, cmplo)
2835DO_PPZW(CMPLS, cmpls)
2836
2837#undef DO_PPZW
2838
38cadeba
RH
2839/*
2840 *** SVE Integer Compare - Immediate Groups
2841 */
2842
2843static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2844 gen_helper_gvec_flags_3 *gen_fn)
2845{
2846 TCGv_ptr pd, zn, pg;
2847 unsigned vsz;
2848 TCGv_i32 t;
2849
2850 if (gen_fn == NULL) {
2851 return false;
2852 }
2853 if (!sve_access_check(s)) {
2854 return true;
2855 }
2856
2857 vsz = vec_full_reg_size(s);
2858 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2859 pd = tcg_temp_new_ptr();
2860 zn = tcg_temp_new_ptr();
2861 pg = tcg_temp_new_ptr();
2862
2863 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2864 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2865 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2866
2867 gen_fn(t, pd, zn, pg, t);
2868
2869 tcg_temp_free_ptr(pd);
2870 tcg_temp_free_ptr(zn);
2871 tcg_temp_free_ptr(pg);
2872
2873 do_pred_flags(t);
2874
2875 tcg_temp_free_i32(t);
2876 return true;
2877}
2878
2879#define DO_PPZI(NAME, name) \
3a7be554 2880static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
38cadeba
RH
2881{ \
2882 static gen_helper_gvec_flags_3 * const fns[4] = { \
2883 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2884 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2885 }; \
2886 return do_ppzi_flags(s, a, fns[a->esz]); \
2887}
2888
2889DO_PPZI(CMPEQ, cmpeq)
2890DO_PPZI(CMPNE, cmpne)
2891DO_PPZI(CMPGT, cmpgt)
2892DO_PPZI(CMPGE, cmpge)
2893DO_PPZI(CMPHI, cmphi)
2894DO_PPZI(CMPHS, cmphs)
2895DO_PPZI(CMPLT, cmplt)
2896DO_PPZI(CMPLE, cmple)
2897DO_PPZI(CMPLO, cmplo)
2898DO_PPZI(CMPLS, cmpls)
2899
2900#undef DO_PPZI
2901
35da316f
RH
2902/*
2903 *** SVE Partition Break Group
2904 */
2905
2906static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2907 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2908{
2909 if (!sve_access_check(s)) {
2910 return true;
2911 }
2912
2913 unsigned vsz = pred_full_reg_size(s);
2914
2915 /* Predicate sizes may be smaller and cannot use simd_desc. */
2916 TCGv_ptr d = tcg_temp_new_ptr();
2917 TCGv_ptr n = tcg_temp_new_ptr();
2918 TCGv_ptr m = tcg_temp_new_ptr();
2919 TCGv_ptr g = tcg_temp_new_ptr();
2920 TCGv_i32 t = tcg_const_i32(vsz - 2);
2921
2922 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2923 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2924 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2925 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2926
2927 if (a->s) {
2928 fn_s(t, d, n, m, g, t);
2929 do_pred_flags(t);
2930 } else {
2931 fn(d, n, m, g, t);
2932 }
2933 tcg_temp_free_ptr(d);
2934 tcg_temp_free_ptr(n);
2935 tcg_temp_free_ptr(m);
2936 tcg_temp_free_ptr(g);
2937 tcg_temp_free_i32(t);
2938 return true;
2939}
2940
2941static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2942 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2943{
2944 if (!sve_access_check(s)) {
2945 return true;
2946 }
2947
2948 unsigned vsz = pred_full_reg_size(s);
2949
2950 /* Predicate sizes may be smaller and cannot use simd_desc. */
2951 TCGv_ptr d = tcg_temp_new_ptr();
2952 TCGv_ptr n = tcg_temp_new_ptr();
2953 TCGv_ptr g = tcg_temp_new_ptr();
2954 TCGv_i32 t = tcg_const_i32(vsz - 2);
2955
2956 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2957 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2958 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2959
2960 if (a->s) {
2961 fn_s(t, d, n, g, t);
2962 do_pred_flags(t);
2963 } else {
2964 fn(d, n, g, t);
2965 }
2966 tcg_temp_free_ptr(d);
2967 tcg_temp_free_ptr(n);
2968 tcg_temp_free_ptr(g);
2969 tcg_temp_free_i32(t);
2970 return true;
2971}
2972
3a7be554 2973static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2974{
2975 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2976}
2977
3a7be554 2978static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
35da316f
RH
2979{
2980 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2981}
2982
3a7be554 2983static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2984{
2985 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2986}
2987
3a7be554 2988static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2989{
2990 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2991}
2992
3a7be554 2993static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2994{
2995 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2996}
2997
3a7be554 2998static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
35da316f
RH
2999{
3000 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3001}
3002
3a7be554 3003static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
35da316f
RH
3004{
3005 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3006}
3007
9ee3a611
RH
3008/*
3009 *** SVE Predicate Count Group
3010 */
3011
3012static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3013{
3014 unsigned psz = pred_full_reg_size(s);
3015
3016 if (psz <= 8) {
3017 uint64_t psz_mask;
3018
3019 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3020 if (pn != pg) {
3021 TCGv_i64 g = tcg_temp_new_i64();
3022 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3023 tcg_gen_and_i64(val, val, g);
3024 tcg_temp_free_i64(g);
3025 }
3026
3027 /* Reduce the pred_esz_masks value simply to reduce the
3028 * size of the code generated here.
3029 */
3030 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3031 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3032
3033 tcg_gen_ctpop_i64(val, val);
3034 } else {
3035 TCGv_ptr t_pn = tcg_temp_new_ptr();
3036 TCGv_ptr t_pg = tcg_temp_new_ptr();
3037 unsigned desc;
3038 TCGv_i32 t_desc;
3039
3040 desc = psz - 2;
3041 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3042
3043 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3044 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3045 t_desc = tcg_const_i32(desc);
3046
3047 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3048 tcg_temp_free_ptr(t_pn);
3049 tcg_temp_free_ptr(t_pg);
3050 tcg_temp_free_i32(t_desc);
3051 }
3052}
3053
3a7be554 3054static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
9ee3a611
RH
3055{
3056 if (sve_access_check(s)) {
3057 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3058 }
3059 return true;
3060}
3061
3a7be554 3062static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3063{
3064 if (sve_access_check(s)) {
3065 TCGv_i64 reg = cpu_reg(s, a->rd);
3066 TCGv_i64 val = tcg_temp_new_i64();
3067
3068 do_cntp(s, val, a->esz, a->pg, a->pg);
3069 if (a->d) {
3070 tcg_gen_sub_i64(reg, reg, val);
3071 } else {
3072 tcg_gen_add_i64(reg, reg, val);
3073 }
3074 tcg_temp_free_i64(val);
3075 }
3076 return true;
3077}
3078
3a7be554 3079static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3080{
3081 if (a->esz == 0) {
3082 return false;
3083 }
3084 if (sve_access_check(s)) {
3085 unsigned vsz = vec_full_reg_size(s);
3086 TCGv_i64 val = tcg_temp_new_i64();
3087 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3088
3089 do_cntp(s, val, a->esz, a->pg, a->pg);
3090 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3091 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3092 }
3093 return true;
3094}
3095
3a7be554 3096static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3097{
3098 if (sve_access_check(s)) {
3099 TCGv_i64 reg = cpu_reg(s, a->rd);
3100 TCGv_i64 val = tcg_temp_new_i64();
3101
3102 do_cntp(s, val, a->esz, a->pg, a->pg);
3103 do_sat_addsub_32(reg, val, a->u, a->d);
3104 }
3105 return true;
3106}
3107
3a7be554 3108static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
9ee3a611
RH
3109{
3110 if (sve_access_check(s)) {
3111 TCGv_i64 reg = cpu_reg(s, a->rd);
3112 TCGv_i64 val = tcg_temp_new_i64();
3113
3114 do_cntp(s, val, a->esz, a->pg, a->pg);
3115 do_sat_addsub_64(reg, val, a->u, a->d);
3116 }
3117 return true;
3118}
3119
3a7be554 3120static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
9ee3a611
RH
3121{
3122 if (a->esz == 0) {
3123 return false;
3124 }
3125 if (sve_access_check(s)) {
3126 TCGv_i64 val = tcg_temp_new_i64();
3127 do_cntp(s, val, a->esz, a->pg, a->pg);
3128 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3129 }
3130 return true;
3131}
3132
caf1cefc
RH
3133/*
3134 *** SVE Integer Compare Scalars Group
3135 */
3136
3a7be554 3137static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
caf1cefc
RH
3138{
3139 if (!sve_access_check(s)) {
3140 return true;
3141 }
3142
3143 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3144 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3145 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3146 TCGv_i64 cmp = tcg_temp_new_i64();
3147
3148 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3149 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3150 tcg_temp_free_i64(cmp);
3151
3152 /* VF = !NF & !CF. */
3153 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3154 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3155
3156 /* Both NF and VF actually look at bit 31. */
3157 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3158 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3159 return true;
3160}
3161
3a7be554 3162static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
caf1cefc 3163{
bbd0968c 3164 TCGv_i64 op0, op1, t0, t1, tmax;
caf1cefc
RH
3165 TCGv_i32 t2, t3;
3166 TCGv_ptr ptr;
3167 unsigned desc, vsz = vec_full_reg_size(s);
3168 TCGCond cond;
3169
bbd0968c
RH
3170 if (!sve_access_check(s)) {
3171 return true;
3172 }
3173
3174 op0 = read_cpu_reg(s, a->rn, 1);
3175 op1 = read_cpu_reg(s, a->rm, 1);
3176
caf1cefc
RH
3177 if (!a->sf) {
3178 if (a->u) {
3179 tcg_gen_ext32u_i64(op0, op0);
3180 tcg_gen_ext32u_i64(op1, op1);
3181 } else {
3182 tcg_gen_ext32s_i64(op0, op0);
3183 tcg_gen_ext32s_i64(op1, op1);
3184 }
3185 }
3186
3187 /* For the helper, compress the different conditions into a computation
3188 * of how many iterations for which the condition is true.
caf1cefc 3189 */
bbd0968c
RH
3190 t0 = tcg_temp_new_i64();
3191 t1 = tcg_temp_new_i64();
caf1cefc
RH
3192 tcg_gen_sub_i64(t0, op1, op0);
3193
bbd0968c 3194 tmax = tcg_const_i64(vsz >> a->esz);
caf1cefc
RH
3195 if (a->eq) {
3196 /* Equality means one more iteration. */
3197 tcg_gen_addi_i64(t0, t0, 1);
bbd0968c
RH
3198
3199 /* If op1 is max (un)signed integer (and the only time the addition
3200 * above could overflow), then we produce an all-true predicate by
3201 * setting the count to the vector length. This is because the
3202 * pseudocode is described as an increment + compare loop, and the
3203 * max integer would always compare true.
3204 */
3205 tcg_gen_movi_i64(t1, (a->sf
3206 ? (a->u ? UINT64_MAX : INT64_MAX)
3207 : (a->u ? UINT32_MAX : INT32_MAX)));
3208 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
caf1cefc
RH
3209 }
3210
bbd0968c
RH
3211 /* Bound to the maximum. */
3212 tcg_gen_umin_i64(t0, t0, tmax);
3213 tcg_temp_free_i64(tmax);
3214
3215 /* Set the count to zero if the condition is false. */
caf1cefc
RH
3216 cond = (a->u
3217 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3218 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3219 tcg_gen_movi_i64(t1, 0);
3220 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
bbd0968c 3221 tcg_temp_free_i64(t1);
caf1cefc 3222
bbd0968c 3223 /* Since we're bounded, pass as a 32-bit type. */
caf1cefc
RH
3224 t2 = tcg_temp_new_i32();
3225 tcg_gen_extrl_i64_i32(t2, t0);
3226 tcg_temp_free_i64(t0);
bbd0968c
RH
3227
3228 /* Scale elements to bits. */
3229 tcg_gen_shli_i32(t2, t2, a->esz);
caf1cefc
RH
3230
3231 desc = (vsz / 8) - 2;
3232 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3233 t3 = tcg_const_i32(desc);
3234
3235 ptr = tcg_temp_new_ptr();
3236 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3237
3238 gen_helper_sve_while(t2, ptr, t2, t3);
3239 do_pred_flags(t2);
3240
3241 tcg_temp_free_ptr(ptr);
3242 tcg_temp_free_i32(t2);
3243 tcg_temp_free_i32(t3);
3244 return true;
3245}
3246
ed491961
RH
3247/*
3248 *** SVE Integer Wide Immediate - Unpredicated Group
3249 */
3250
3a7be554 3251static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
ed491961
RH
3252{
3253 if (a->esz == 0) {
3254 return false;
3255 }
3256 if (sve_access_check(s)) {
3257 unsigned vsz = vec_full_reg_size(s);
3258 int dofs = vec_full_reg_offset(s, a->rd);
3259 uint64_t imm;
3260
3261 /* Decode the VFP immediate. */
3262 imm = vfp_expand_imm(a->esz, a->imm);
3263 imm = dup_const(a->esz, imm);
3264
3265 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3266 }
3267 return true;
3268}
3269
3a7be554 3270static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
ed491961 3271{
3a7be554 3272 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
ed491961
RH
3273 return false;
3274 }
3275 if (sve_access_check(s)) {
3276 unsigned vsz = vec_full_reg_size(s);
3277 int dofs = vec_full_reg_offset(s, a->rd);
3278
3279 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3280 }
3281 return true;
3282}
3283
3a7be554 3284static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3285{
3a7be554 3286 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3287 return false;
3288 }
3289 if (sve_access_check(s)) {
3290 unsigned vsz = vec_full_reg_size(s);
3291 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3292 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3293 }
3294 return true;
3295}
3296
3a7be554 3297static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3298{
3299 a->imm = -a->imm;
3a7be554 3300 return trans_ADD_zzi(s, a);
6e6a157d
RH
3301}
3302
3a7be554 3303static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3304{
3305 static const GVecGen2s op[4] = {
3306 { .fni8 = tcg_gen_vec_sub8_i64,
3307 .fniv = tcg_gen_sub_vec,
3308 .fno = gen_helper_sve_subri_b,
3309 .opc = INDEX_op_sub_vec,
3310 .vece = MO_8,
3311 .scalar_first = true },
3312 { .fni8 = tcg_gen_vec_sub16_i64,
3313 .fniv = tcg_gen_sub_vec,
3314 .fno = gen_helper_sve_subri_h,
3315 .opc = INDEX_op_sub_vec,
3316 .vece = MO_16,
3317 .scalar_first = true },
3318 { .fni4 = tcg_gen_sub_i32,
3319 .fniv = tcg_gen_sub_vec,
3320 .fno = gen_helper_sve_subri_s,
3321 .opc = INDEX_op_sub_vec,
3322 .vece = MO_32,
3323 .scalar_first = true },
3324 { .fni8 = tcg_gen_sub_i64,
3325 .fniv = tcg_gen_sub_vec,
3326 .fno = gen_helper_sve_subri_d,
3327 .opc = INDEX_op_sub_vec,
3328 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3329 .vece = MO_64,
3330 .scalar_first = true }
3331 };
3332
3a7be554 3333 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3334 return false;
3335 }
3336 if (sve_access_check(s)) {
3337 unsigned vsz = vec_full_reg_size(s);
3338 TCGv_i64 c = tcg_const_i64(a->imm);
3339 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3340 vec_full_reg_offset(s, a->rn),
3341 vsz, vsz, c, &op[a->esz]);
3342 tcg_temp_free_i64(c);
3343 }
3344 return true;
3345}
3346
3a7be554 3347static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d
RH
3348{
3349 if (sve_access_check(s)) {
3350 unsigned vsz = vec_full_reg_size(s);
3351 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3352 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3353 }
3354 return true;
3355}
3356
3a7be554 3357static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
6e6a157d 3358{
3a7be554 3359 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
6e6a157d
RH
3360 return false;
3361 }
3362 if (sve_access_check(s)) {
3363 TCGv_i64 val = tcg_const_i64(a->imm);
3364 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3365 tcg_temp_free_i64(val);
3366 }
3367 return true;
3368}
3369
3a7be554 3370static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3371{
3a7be554 3372 return do_zzi_sat(s, a, false, false);
6e6a157d
RH
3373}
3374
3a7be554 3375static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3376{
3a7be554 3377 return do_zzi_sat(s, a, true, false);
6e6a157d
RH
3378}
3379
3a7be554 3380static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3381{
3a7be554 3382 return do_zzi_sat(s, a, false, true);
6e6a157d
RH
3383}
3384
3a7be554 3385static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
6e6a157d 3386{
3a7be554 3387 return do_zzi_sat(s, a, true, true);
6e6a157d
RH
3388}
3389
3390static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3391{
3392 if (sve_access_check(s)) {
3393 unsigned vsz = vec_full_reg_size(s);
3394 TCGv_i64 c = tcg_const_i64(a->imm);
3395
3396 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3397 vec_full_reg_offset(s, a->rn),
3398 c, vsz, vsz, 0, fn);
3399 tcg_temp_free_i64(c);
3400 }
3401 return true;
3402}
3403
3404#define DO_ZZI(NAME, name) \
3a7be554 3405static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
6e6a157d
RH
3406{ \
3407 static gen_helper_gvec_2i * const fns[4] = { \
3408 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3409 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3410 }; \
3411 return do_zzi_ool(s, a, fns[a->esz]); \
3412}
3413
3414DO_ZZI(SMAX, smax)
3415DO_ZZI(UMAX, umax)
3416DO_ZZI(SMIN, smin)
3417DO_ZZI(UMIN, umin)
3418
3419#undef DO_ZZI
3420
3a7be554 3421static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
d730ecaa
RH
3422{
3423 static gen_helper_gvec_3 * const fns[2][2] = {
3424 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3425 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3426 };
3427
3428 if (sve_access_check(s)) {
3429 unsigned vsz = vec_full_reg_size(s);
3430 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3431 vec_full_reg_offset(s, a->rn),
3432 vec_full_reg_offset(s, a->rm),
3433 vsz, vsz, 0, fns[a->u][a->sz]);
3434 }
3435 return true;
3436}
3437
3a7be554 3438static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
16fcfdc7
RH
3439{
3440 static gen_helper_gvec_3 * const fns[2][2] = {
3441 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3442 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3443 };
3444
3445 if (sve_access_check(s)) {
3446 unsigned vsz = vec_full_reg_size(s);
3447 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3448 vec_full_reg_offset(s, a->rn),
3449 vec_full_reg_offset(s, a->rm),
3450 vsz, vsz, a->index, fns[a->u][a->sz]);
3451 }
3452 return true;
3453}
3454
3455
ca40a6e6
RH
3456/*
3457 *** SVE Floating Point Multiply-Add Indexed Group
3458 */
3459
3a7be554 3460static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
ca40a6e6
RH
3461{
3462 static gen_helper_gvec_4_ptr * const fns[3] = {
3463 gen_helper_gvec_fmla_idx_h,
3464 gen_helper_gvec_fmla_idx_s,
3465 gen_helper_gvec_fmla_idx_d,
3466 };
3467
3468 if (sve_access_check(s)) {
3469 unsigned vsz = vec_full_reg_size(s);
3470 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3471 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3472 vec_full_reg_offset(s, a->rn),
3473 vec_full_reg_offset(s, a->rm),
3474 vec_full_reg_offset(s, a->ra),
3475 status, vsz, vsz, (a->index << 1) | a->sub,
3476 fns[a->esz - 1]);
3477 tcg_temp_free_ptr(status);
3478 }
3479 return true;
3480}
3481
3482/*
3483 *** SVE Floating Point Multiply Indexed Group
3484 */
3485
3a7be554 3486static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
ca40a6e6
RH
3487{
3488 static gen_helper_gvec_3_ptr * const fns[3] = {
3489 gen_helper_gvec_fmul_idx_h,
3490 gen_helper_gvec_fmul_idx_s,
3491 gen_helper_gvec_fmul_idx_d,
3492 };
3493
3494 if (sve_access_check(s)) {
3495 unsigned vsz = vec_full_reg_size(s);
3496 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3497 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3498 vec_full_reg_offset(s, a->rn),
3499 vec_full_reg_offset(s, a->rm),
3500 status, vsz, vsz, a->index, fns[a->esz - 1]);
3501 tcg_temp_free_ptr(status);
3502 }
3503 return true;
3504}
3505
23fbe79f
RH
3506/*
3507 *** SVE Floating Point Fast Reduction Group
3508 */
3509
3510typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3511 TCGv_ptr, TCGv_i32);
3512
3513static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3514 gen_helper_fp_reduce *fn)
3515{
3516 unsigned vsz = vec_full_reg_size(s);
3517 unsigned p2vsz = pow2ceil(vsz);
3518 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3519 TCGv_ptr t_zn, t_pg, status;
3520 TCGv_i64 temp;
3521
3522 temp = tcg_temp_new_i64();
3523 t_zn = tcg_temp_new_ptr();
3524 t_pg = tcg_temp_new_ptr();
3525
3526 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3527 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3528 status = get_fpstatus_ptr(a->esz == MO_16);
3529
3530 fn(temp, t_zn, t_pg, status, t_desc);
3531 tcg_temp_free_ptr(t_zn);
3532 tcg_temp_free_ptr(t_pg);
3533 tcg_temp_free_ptr(status);
3534 tcg_temp_free_i32(t_desc);
3535
3536 write_fp_dreg(s, a->rd, temp);
3537 tcg_temp_free_i64(temp);
3538}
3539
3540#define DO_VPZ(NAME, name) \
3a7be554 3541static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
23fbe79f
RH
3542{ \
3543 static gen_helper_fp_reduce * const fns[3] = { \
3544 gen_helper_sve_##name##_h, \
3545 gen_helper_sve_##name##_s, \
3546 gen_helper_sve_##name##_d, \
3547 }; \
3548 if (a->esz == 0) { \
3549 return false; \
3550 } \
3551 if (sve_access_check(s)) { \
3552 do_reduce(s, a, fns[a->esz - 1]); \
3553 } \
3554 return true; \
3555}
3556
3557DO_VPZ(FADDV, faddv)
3558DO_VPZ(FMINNMV, fminnmv)
3559DO_VPZ(FMAXNMV, fmaxnmv)
3560DO_VPZ(FMINV, fminv)
3561DO_VPZ(FMAXV, fmaxv)
3562
3887c038
RH
3563/*
3564 *** SVE Floating Point Unary Operations - Unpredicated Group
3565 */
3566
3567static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3568{
3569 unsigned vsz = vec_full_reg_size(s);
3570 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3571
3572 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3573 vec_full_reg_offset(s, a->rn),
3574 status, vsz, vsz, 0, fn);
3575 tcg_temp_free_ptr(status);
3576}
3577
3a7be554 3578static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3579{
3580 static gen_helper_gvec_2_ptr * const fns[3] = {
3581 gen_helper_gvec_frecpe_h,
3582 gen_helper_gvec_frecpe_s,
3583 gen_helper_gvec_frecpe_d,
3584 };
3585 if (a->esz == 0) {
3586 return false;
3587 }
3588 if (sve_access_check(s)) {
3589 do_zz_fp(s, a, fns[a->esz - 1]);
3590 }
3591 return true;
3592}
3593
3a7be554 3594static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3887c038
RH
3595{
3596 static gen_helper_gvec_2_ptr * const fns[3] = {
3597 gen_helper_gvec_frsqrte_h,
3598 gen_helper_gvec_frsqrte_s,
3599 gen_helper_gvec_frsqrte_d,
3600 };
3601 if (a->esz == 0) {
3602 return false;
3603 }
3604 if (sve_access_check(s)) {
3605 do_zz_fp(s, a, fns[a->esz - 1]);
3606 }
3607 return true;
3608}
3609
4d2e2a03
RH
3610/*
3611 *** SVE Floating Point Compare with Zero Group
3612 */
3613
3614static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3615 gen_helper_gvec_3_ptr *fn)
3616{
3617 unsigned vsz = vec_full_reg_size(s);
3618 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3619
3620 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3621 vec_full_reg_offset(s, a->rn),
3622 pred_full_reg_offset(s, a->pg),
3623 status, vsz, vsz, 0, fn);
3624 tcg_temp_free_ptr(status);
3625}
3626
3627#define DO_PPZ(NAME, name) \
3a7be554 3628static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4d2e2a03
RH
3629{ \
3630 static gen_helper_gvec_3_ptr * const fns[3] = { \
3631 gen_helper_sve_##name##_h, \
3632 gen_helper_sve_##name##_s, \
3633 gen_helper_sve_##name##_d, \
3634 }; \
3635 if (a->esz == 0) { \
3636 return false; \
3637 } \
3638 if (sve_access_check(s)) { \
3639 do_ppz_fp(s, a, fns[a->esz - 1]); \
3640 } \
3641 return true; \
3642}
3643
3644DO_PPZ(FCMGE_ppz0, fcmge0)
3645DO_PPZ(FCMGT_ppz0, fcmgt0)
3646DO_PPZ(FCMLE_ppz0, fcmle0)
3647DO_PPZ(FCMLT_ppz0, fcmlt0)
3648DO_PPZ(FCMEQ_ppz0, fcmeq0)
3649DO_PPZ(FCMNE_ppz0, fcmne0)
3650
3651#undef DO_PPZ
3652
67fcd9ad
RH
3653/*
3654 *** SVE floating-point trig multiply-add coefficient
3655 */
3656
3a7be554 3657static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
67fcd9ad
RH
3658{
3659 static gen_helper_gvec_3_ptr * const fns[3] = {
3660 gen_helper_sve_ftmad_h,
3661 gen_helper_sve_ftmad_s,
3662 gen_helper_sve_ftmad_d,
3663 };
3664
3665 if (a->esz == 0) {
3666 return false;
3667 }
3668 if (sve_access_check(s)) {
3669 unsigned vsz = vec_full_reg_size(s);
3670 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3671 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3672 vec_full_reg_offset(s, a->rn),
3673 vec_full_reg_offset(s, a->rm),
3674 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3675 tcg_temp_free_ptr(status);
3676 }
3677 return true;
3678}
3679
7f9ddf64
RH
3680/*
3681 *** SVE Floating Point Accumulating Reduction Group
3682 */
3683
3a7be554 3684static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
7f9ddf64
RH
3685{
3686 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3687 TCGv_ptr, TCGv_ptr, TCGv_i32);
3688 static fadda_fn * const fns[3] = {
3689 gen_helper_sve_fadda_h,
3690 gen_helper_sve_fadda_s,
3691 gen_helper_sve_fadda_d,
3692 };
3693 unsigned vsz = vec_full_reg_size(s);
3694 TCGv_ptr t_rm, t_pg, t_fpst;
3695 TCGv_i64 t_val;
3696 TCGv_i32 t_desc;
3697
3698 if (a->esz == 0) {
3699 return false;
3700 }
3701 if (!sve_access_check(s)) {
3702 return true;
3703 }
3704
3705 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3706 t_rm = tcg_temp_new_ptr();
3707 t_pg = tcg_temp_new_ptr();
3708 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3709 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3710 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3711 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3712
3713 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3714
3715 tcg_temp_free_i32(t_desc);
3716 tcg_temp_free_ptr(t_fpst);
3717 tcg_temp_free_ptr(t_pg);
3718 tcg_temp_free_ptr(t_rm);
3719
3720 write_fp_dreg(s, a->rd, t_val);
3721 tcg_temp_free_i64(t_val);
3722 return true;
3723}
3724
29b80469
RH
3725/*
3726 *** SVE Floating Point Arithmetic - Unpredicated Group
3727 */
3728
3729static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3730 gen_helper_gvec_3_ptr *fn)
3731{
3732 if (fn == NULL) {
3733 return false;
3734 }
3735 if (sve_access_check(s)) {
3736 unsigned vsz = vec_full_reg_size(s);
3737 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3738 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3739 vec_full_reg_offset(s, a->rn),
3740 vec_full_reg_offset(s, a->rm),
3741 status, vsz, vsz, 0, fn);
3742 tcg_temp_free_ptr(status);
3743 }
3744 return true;
3745}
3746
3747
3748#define DO_FP3(NAME, name) \
3a7be554 3749static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
29b80469
RH
3750{ \
3751 static gen_helper_gvec_3_ptr * const fns[4] = { \
3752 NULL, gen_helper_gvec_##name##_h, \
3753 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3754 }; \
3755 return do_zzz_fp(s, a, fns[a->esz]); \
3756}
3757
3758DO_FP3(FADD_zzz, fadd)
3759DO_FP3(FSUB_zzz, fsub)
3760DO_FP3(FMUL_zzz, fmul)
3761DO_FP3(FTSMUL, ftsmul)
3762DO_FP3(FRECPS, recps)
3763DO_FP3(FRSQRTS, rsqrts)
3764
3765#undef DO_FP3
3766
ec3b87c2
RH
3767/*
3768 *** SVE Floating Point Arithmetic - Predicated Group
3769 */
3770
3771static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3772 gen_helper_gvec_4_ptr *fn)
3773{
3774 if (fn == NULL) {
3775 return false;
3776 }
3777 if (sve_access_check(s)) {
3778 unsigned vsz = vec_full_reg_size(s);
3779 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3780 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3781 vec_full_reg_offset(s, a->rn),
3782 vec_full_reg_offset(s, a->rm),
3783 pred_full_reg_offset(s, a->pg),
3784 status, vsz, vsz, 0, fn);
3785 tcg_temp_free_ptr(status);
3786 }
3787 return true;
3788}
3789
3790#define DO_FP3(NAME, name) \
3a7be554 3791static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
ec3b87c2
RH
3792{ \
3793 static gen_helper_gvec_4_ptr * const fns[4] = { \
3794 NULL, gen_helper_sve_##name##_h, \
3795 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3796 }; \
3797 return do_zpzz_fp(s, a, fns[a->esz]); \
3798}
3799
3800DO_FP3(FADD_zpzz, fadd)
3801DO_FP3(FSUB_zpzz, fsub)
3802DO_FP3(FMUL_zpzz, fmul)
3803DO_FP3(FMIN_zpzz, fmin)
3804DO_FP3(FMAX_zpzz, fmax)
3805DO_FP3(FMINNM_zpzz, fminnum)
3806DO_FP3(FMAXNM_zpzz, fmaxnum)
3807DO_FP3(FABD, fabd)
3808DO_FP3(FSCALE, fscalbn)
3809DO_FP3(FDIV, fdiv)
3810DO_FP3(FMULX, fmulx)
3811
3812#undef DO_FP3
8092c6a3 3813
cc48affe
RH
3814typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3815 TCGv_i64, TCGv_ptr, TCGv_i32);
3816
3817static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3818 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3819{
3820 unsigned vsz = vec_full_reg_size(s);
3821 TCGv_ptr t_zd, t_zn, t_pg, status;
3822 TCGv_i32 desc;
3823
3824 t_zd = tcg_temp_new_ptr();
3825 t_zn = tcg_temp_new_ptr();
3826 t_pg = tcg_temp_new_ptr();
3827 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3828 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3829 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3830
3831 status = get_fpstatus_ptr(is_fp16);
3832 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3833 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3834
3835 tcg_temp_free_i32(desc);
3836 tcg_temp_free_ptr(status);
3837 tcg_temp_free_ptr(t_pg);
3838 tcg_temp_free_ptr(t_zn);
3839 tcg_temp_free_ptr(t_zd);
3840}
3841
3842static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3843 gen_helper_sve_fp2scalar *fn)
3844{
3845 TCGv_i64 temp = tcg_const_i64(imm);
3846 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3847 tcg_temp_free_i64(temp);
3848}
3849
3850#define DO_FP_IMM(NAME, name, const0, const1) \
3a7be554 3851static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
cc48affe
RH
3852{ \
3853 static gen_helper_sve_fp2scalar * const fns[3] = { \
3854 gen_helper_sve_##name##_h, \
3855 gen_helper_sve_##name##_s, \
3856 gen_helper_sve_##name##_d \
3857 }; \
3858 static uint64_t const val[3][2] = { \
3859 { float16_##const0, float16_##const1 }, \
3860 { float32_##const0, float32_##const1 }, \
3861 { float64_##const0, float64_##const1 }, \
3862 }; \
3863 if (a->esz == 0) { \
3864 return false; \
3865 } \
3866 if (sve_access_check(s)) { \
3867 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3868 } \
3869 return true; \
3870}
3871
3872#define float16_two make_float16(0x4000)
3873#define float32_two make_float32(0x40000000)
3874#define float64_two make_float64(0x4000000000000000ULL)
3875
3876DO_FP_IMM(FADD, fadds, half, one)
3877DO_FP_IMM(FSUB, fsubs, half, one)
3878DO_FP_IMM(FMUL, fmuls, half, two)
3879DO_FP_IMM(FSUBR, fsubrs, half, one)
3880DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3881DO_FP_IMM(FMINNM, fminnms, zero, one)
3882DO_FP_IMM(FMAX, fmaxs, zero, one)
3883DO_FP_IMM(FMIN, fmins, zero, one)
3884
3885#undef DO_FP_IMM
3886
abfdefd5
RH
3887static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3888 gen_helper_gvec_4_ptr *fn)
3889{
3890 if (fn == NULL) {
3891 return false;
3892 }
3893 if (sve_access_check(s)) {
3894 unsigned vsz = vec_full_reg_size(s);
3895 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3896 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3897 vec_full_reg_offset(s, a->rn),
3898 vec_full_reg_offset(s, a->rm),
3899 pred_full_reg_offset(s, a->pg),
3900 status, vsz, vsz, 0, fn);
3901 tcg_temp_free_ptr(status);
3902 }
3903 return true;
3904}
3905
3906#define DO_FPCMP(NAME, name) \
3a7be554 3907static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
abfdefd5
RH
3908{ \
3909 static gen_helper_gvec_4_ptr * const fns[4] = { \
3910 NULL, gen_helper_sve_##name##_h, \
3911 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3912 }; \
3913 return do_fp_cmp(s, a, fns[a->esz]); \
3914}
3915
3916DO_FPCMP(FCMGE, fcmge)
3917DO_FPCMP(FCMGT, fcmgt)
3918DO_FPCMP(FCMEQ, fcmeq)
3919DO_FPCMP(FCMNE, fcmne)
3920DO_FPCMP(FCMUO, fcmuo)
3921DO_FPCMP(FACGE, facge)
3922DO_FPCMP(FACGT, facgt)
3923
3924#undef DO_FPCMP
3925
3a7be554 3926static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
76a9d9cd
RH
3927{
3928 static gen_helper_gvec_4_ptr * const fns[3] = {
3929 gen_helper_sve_fcadd_h,
3930 gen_helper_sve_fcadd_s,
3931 gen_helper_sve_fcadd_d
3932 };
3933
3934 if (a->esz == 0) {
3935 return false;
3936 }
3937 if (sve_access_check(s)) {
3938 unsigned vsz = vec_full_reg_size(s);
3939 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3940 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3941 vec_full_reg_offset(s, a->rn),
3942 vec_full_reg_offset(s, a->rm),
3943 pred_full_reg_offset(s, a->pg),
3944 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3945 tcg_temp_free_ptr(status);
3946 }
3947 return true;
3948}
3949
6ceabaad
RH
3950typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3951
3952static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3953{
3954 if (fn == NULL) {
3955 return false;
3956 }
3957 if (!sve_access_check(s)) {
3958 return true;
3959 }
3960
3961 unsigned vsz = vec_full_reg_size(s);
3962 unsigned desc;
3963 TCGv_i32 t_desc;
3964 TCGv_ptr pg = tcg_temp_new_ptr();
3965
3966 /* We would need 7 operands to pass these arguments "properly".
3967 * So we encode all the register numbers into the descriptor.
3968 */
3969 desc = deposit32(a->rd, 5, 5, a->rn);
3970 desc = deposit32(desc, 10, 5, a->rm);
3971 desc = deposit32(desc, 15, 5, a->ra);
3972 desc = simd_desc(vsz, vsz, desc);
3973
3974 t_desc = tcg_const_i32(desc);
3975 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3976 fn(cpu_env, pg, t_desc);
3977 tcg_temp_free_i32(t_desc);
3978 tcg_temp_free_ptr(pg);
3979 return true;
3980}
3981
3982#define DO_FMLA(NAME, name) \
3a7be554 3983static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
6ceabaad
RH
3984{ \
3985 static gen_helper_sve_fmla * const fns[4] = { \
3986 NULL, gen_helper_sve_##name##_h, \
3987 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3988 }; \
3989 return do_fmla(s, a, fns[a->esz]); \
3990}
3991
3992DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3993DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3994DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3995DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3996
3997#undef DO_FMLA
3998
3a7be554 3999static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
05f48bab
RH
4000{
4001 static gen_helper_sve_fmla * const fns[3] = {
4002 gen_helper_sve_fcmla_zpzzz_h,
4003 gen_helper_sve_fcmla_zpzzz_s,
4004 gen_helper_sve_fcmla_zpzzz_d,
4005 };
4006
4007 if (a->esz == 0) {
4008 return false;
4009 }
4010 if (sve_access_check(s)) {
4011 unsigned vsz = vec_full_reg_size(s);
4012 unsigned desc;
4013 TCGv_i32 t_desc;
4014 TCGv_ptr pg = tcg_temp_new_ptr();
4015
4016 /* We would need 7 operands to pass these arguments "properly".
4017 * So we encode all the register numbers into the descriptor.
4018 */
4019 desc = deposit32(a->rd, 5, 5, a->rn);
4020 desc = deposit32(desc, 10, 5, a->rm);
4021 desc = deposit32(desc, 15, 5, a->ra);
4022 desc = deposit32(desc, 20, 2, a->rot);
4023 desc = sextract32(desc, 0, 22);
4024 desc = simd_desc(vsz, vsz, desc);
4025
4026 t_desc = tcg_const_i32(desc);
4027 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4028 fns[a->esz - 1](cpu_env, pg, t_desc);
4029 tcg_temp_free_i32(t_desc);
4030 tcg_temp_free_ptr(pg);
4031 }
4032 return true;
4033}
4034
3a7be554 4035static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
18fc2405
RH
4036{
4037 static gen_helper_gvec_3_ptr * const fns[2] = {
4038 gen_helper_gvec_fcmlah_idx,
4039 gen_helper_gvec_fcmlas_idx,
4040 };
4041
4042 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4043 tcg_debug_assert(a->rd == a->ra);
4044 if (sve_access_check(s)) {
4045 unsigned vsz = vec_full_reg_size(s);
4046 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4047 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4048 vec_full_reg_offset(s, a->rn),
4049 vec_full_reg_offset(s, a->rm),
4050 status, vsz, vsz,
4051 a->index * 4 + a->rot,
4052 fns[a->esz - 1]);
4053 tcg_temp_free_ptr(status);
4054 }
4055 return true;
4056}
4057
8092c6a3
RH
4058/*
4059 *** SVE Floating Point Unary Operations Predicated Group
4060 */
4061
4062static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4063 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4064{
4065 if (sve_access_check(s)) {
4066 unsigned vsz = vec_full_reg_size(s);
4067 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4068 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4069 vec_full_reg_offset(s, rn),
4070 pred_full_reg_offset(s, pg),
4071 status, vsz, vsz, 0, fn);
4072 tcg_temp_free_ptr(status);
4073 }
4074 return true;
4075}
4076
3a7be554 4077static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4078{
e4ab5124 4079 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
46d33d1e
RH
4080}
4081
3a7be554 4082static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4083{
4084 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4085}
4086
3a7be554 4087static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
46d33d1e 4088{
e4ab5124 4089 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
46d33d1e
RH
4090}
4091
3a7be554 4092static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4093{
4094 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4095}
4096
3a7be554 4097static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4098{
4099 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4100}
4101
3a7be554 4102static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
46d33d1e
RH
4103{
4104 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4105}
4106
3a7be554 4107static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4108{
4109 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4110}
4111
3a7be554 4112static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4113{
4114 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4115}
4116
3a7be554 4117static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4118{
4119 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4120}
4121
3a7be554 4122static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4123{
4124 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4125}
4126
3a7be554 4127static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4128{
4129 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4130}
4131
3a7be554 4132static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4133{
4134 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4135}
4136
3a7be554 4137static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4138{
4139 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4140}
4141
3a7be554 4142static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4143{
4144 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4145}
4146
3a7be554 4147static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4148{
4149 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4150}
4151
3a7be554 4152static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4153{
4154 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4155}
4156
3a7be554 4157static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4158{
4159 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4160}
4161
3a7be554 4162static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4163{
4164 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4165}
4166
3a7be554 4167static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4168{
4169 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4170}
4171
3a7be554 4172static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
df4de1af
RH
4173{
4174 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4175}
4176
cda3c753
RH
4177static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4178 gen_helper_sve_frint_h,
4179 gen_helper_sve_frint_s,
4180 gen_helper_sve_frint_d
4181};
4182
3a7be554 4183static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4184{
4185 if (a->esz == 0) {
4186 return false;
4187 }
4188 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4189 frint_fns[a->esz - 1]);
4190}
4191
3a7be554 4192static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4193{
4194 static gen_helper_gvec_3_ptr * const fns[3] = {
4195 gen_helper_sve_frintx_h,
4196 gen_helper_sve_frintx_s,
4197 gen_helper_sve_frintx_d
4198 };
4199 if (a->esz == 0) {
4200 return false;
4201 }
4202 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4203}
4204
4205static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4206{
4207 if (a->esz == 0) {
4208 return false;
4209 }
4210 if (sve_access_check(s)) {
4211 unsigned vsz = vec_full_reg_size(s);
4212 TCGv_i32 tmode = tcg_const_i32(mode);
4213 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4214
4215 gen_helper_set_rmode(tmode, tmode, status);
4216
4217 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4218 vec_full_reg_offset(s, a->rn),
4219 pred_full_reg_offset(s, a->pg),
4220 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4221
4222 gen_helper_set_rmode(tmode, tmode, status);
4223 tcg_temp_free_i32(tmode);
4224 tcg_temp_free_ptr(status);
4225 }
4226 return true;
4227}
4228
3a7be554 4229static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4230{
4231 return do_frint_mode(s, a, float_round_nearest_even);
4232}
4233
3a7be554 4234static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4235{
4236 return do_frint_mode(s, a, float_round_up);
4237}
4238
3a7be554 4239static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4240{
4241 return do_frint_mode(s, a, float_round_down);
4242}
4243
3a7be554 4244static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4245{
4246 return do_frint_mode(s, a, float_round_to_zero);
4247}
4248
3a7be554 4249static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
cda3c753
RH
4250{
4251 return do_frint_mode(s, a, float_round_ties_away);
4252}
4253
3a7be554 4254static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4255{
4256 static gen_helper_gvec_3_ptr * const fns[3] = {
4257 gen_helper_sve_frecpx_h,
4258 gen_helper_sve_frecpx_s,
4259 gen_helper_sve_frecpx_d
4260 };
4261 if (a->esz == 0) {
4262 return false;
4263 }
4264 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4265}
4266
3a7be554 4267static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
ec5b375b
RH
4268{
4269 static gen_helper_gvec_3_ptr * const fns[3] = {
4270 gen_helper_sve_fsqrt_h,
4271 gen_helper_sve_fsqrt_s,
4272 gen_helper_sve_fsqrt_d
4273 };
4274 if (a->esz == 0) {
4275 return false;
4276 }
4277 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4278}
4279
3a7be554 4280static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4281{
4282 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4283}
4284
3a7be554 4285static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4286{
4287 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4288}
4289
3a7be554 4290static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4291{
4292 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4293}
4294
3a7be554 4295static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4296{
4297 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4298}
4299
3a7be554 4300static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4301{
4302 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4303}
4304
3a7be554 4305static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4306{
4307 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4308}
4309
3a7be554 4310static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4311{
4312 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4313}
4314
3a7be554 4315static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4316{
4317 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4318}
4319
3a7be554 4320static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4321{
4322 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4323}
4324
3a7be554 4325static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4326{
4327 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4328}
4329
3a7be554 4330static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4331{
4332 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4333}
4334
3a7be554 4335static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4336{
4337 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4338}
4339
3a7be554 4340static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4341{
4342 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4343}
4344
3a7be554 4345static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
8092c6a3
RH
4346{
4347 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4348}
4349
d1822297
RH
4350/*
4351 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4352 */
4353
4354/* Subroutine loading a vector register at VOFS of LEN bytes.
4355 * The load should begin at the address Rn + IMM.
4356 */
4357
19f2acc9 4358static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
d1822297 4359{
19f2acc9
RH
4360 int len_align = QEMU_ALIGN_DOWN(len, 8);
4361 int len_remain = len % 8;
4362 int nparts = len / 8 + ctpop8(len_remain);
d1822297
RH
4363 int midx = get_mem_index(s);
4364 TCGv_i64 addr, t0, t1;
4365
4366 addr = tcg_temp_new_i64();
4367 t0 = tcg_temp_new_i64();
4368
4369 /* Note that unpredicated load/store of vector/predicate registers
4370 * are defined as a stream of bytes, which equates to little-endian
4371 * operations on larger quantities. There is no nice way to force
4372 * a little-endian load for aarch64_be-linux-user out of line.
4373 *
4374 * Attempt to keep code expansion to a minimum by limiting the
4375 * amount of unrolling done.
4376 */
4377 if (nparts <= 4) {
4378 int i;
4379
4380 for (i = 0; i < len_align; i += 8) {
4381 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4382 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4383 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4384 }
4385 } else {
4386 TCGLabel *loop = gen_new_label();
4387 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4388
4389 gen_set_label(loop);
4390
4391 /* Minimize the number of local temps that must be re-read from
4392 * the stack each iteration. Instead, re-compute values other
4393 * than the loop counter.
4394 */
4395 tp = tcg_temp_new_ptr();
4396 tcg_gen_addi_ptr(tp, i, imm);
4397 tcg_gen_extu_ptr_i64(addr, tp);
4398 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4399
4400 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4401
4402 tcg_gen_add_ptr(tp, cpu_env, i);
4403 tcg_gen_addi_ptr(i, i, 8);
4404 tcg_gen_st_i64(t0, tp, vofs);
4405 tcg_temp_free_ptr(tp);
4406
4407 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4408 tcg_temp_free_ptr(i);
4409 }
4410
4411 /* Predicate register loads can be any multiple of 2.
4412 * Note that we still store the entire 64-bit unit into cpu_env.
4413 */
4414 if (len_remain) {
4415 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4416
4417 switch (len_remain) {
4418 case 2:
4419 case 4:
4420 case 8:
4421 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4422 break;
4423
4424 case 6:
4425 t1 = tcg_temp_new_i64();
4426 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4427 tcg_gen_addi_i64(addr, addr, 4);
4428 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4429 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4430 tcg_temp_free_i64(t1);
4431 break;
4432
4433 default:
4434 g_assert_not_reached();
4435 }
4436 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4437 }
4438 tcg_temp_free_i64(addr);
4439 tcg_temp_free_i64(t0);
4440}
4441
5047c204 4442/* Similarly for stores. */
19f2acc9 4443static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5047c204 4444{
19f2acc9
RH
4445 int len_align = QEMU_ALIGN_DOWN(len, 8);
4446 int len_remain = len % 8;
4447 int nparts = len / 8 + ctpop8(len_remain);
5047c204
RH
4448 int midx = get_mem_index(s);
4449 TCGv_i64 addr, t0;
4450
4451 addr = tcg_temp_new_i64();
4452 t0 = tcg_temp_new_i64();
4453
4454 /* Note that unpredicated load/store of vector/predicate registers
4455 * are defined as a stream of bytes, which equates to little-endian
4456 * operations on larger quantities. There is no nice way to force
4457 * a little-endian store for aarch64_be-linux-user out of line.
4458 *
4459 * Attempt to keep code expansion to a minimum by limiting the
4460 * amount of unrolling done.
4461 */
4462 if (nparts <= 4) {
4463 int i;
4464
4465 for (i = 0; i < len_align; i += 8) {
4466 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4467 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4468 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4469 }
4470 } else {
4471 TCGLabel *loop = gen_new_label();
4472 TCGv_ptr t2, i = tcg_const_local_ptr(0);
4473
4474 gen_set_label(loop);
4475
4476 t2 = tcg_temp_new_ptr();
4477 tcg_gen_add_ptr(t2, cpu_env, i);
4478 tcg_gen_ld_i64(t0, t2, vofs);
4479
4480 /* Minimize the number of local temps that must be re-read from
4481 * the stack each iteration. Instead, re-compute values other
4482 * than the loop counter.
4483 */
4484 tcg_gen_addi_ptr(t2, i, imm);
4485 tcg_gen_extu_ptr_i64(addr, t2);
4486 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4487 tcg_temp_free_ptr(t2);
4488
4489 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4490
4491 tcg_gen_addi_ptr(i, i, 8);
4492
4493 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4494 tcg_temp_free_ptr(i);
4495 }
4496
4497 /* Predicate register stores can be any multiple of 2. */
4498 if (len_remain) {
4499 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4500 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4501
4502 switch (len_remain) {
4503 case 2:
4504 case 4:
4505 case 8:
4506 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4507 break;
4508
4509 case 6:
4510 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4511 tcg_gen_addi_i64(addr, addr, 4);
4512 tcg_gen_shri_i64(t0, t0, 32);
4513 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4514 break;
4515
4516 default:
4517 g_assert_not_reached();
4518 }
4519 }
4520 tcg_temp_free_i64(addr);
4521 tcg_temp_free_i64(t0);
4522}
4523
3a7be554 4524static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
d1822297
RH
4525{
4526 if (sve_access_check(s)) {
4527 int size = vec_full_reg_size(s);
4528 int off = vec_full_reg_offset(s, a->rd);
4529 do_ldr(s, off, size, a->rn, a->imm * size);
4530 }
4531 return true;
4532}
4533
3a7be554 4534static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
d1822297
RH
4535{
4536 if (sve_access_check(s)) {
4537 int size = pred_full_reg_size(s);
4538 int off = pred_full_reg_offset(s, a->rd);
4539 do_ldr(s, off, size, a->rn, a->imm * size);
4540 }
4541 return true;
4542}
c4e7c493 4543
3a7be554 4544static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5047c204
RH
4545{
4546 if (sve_access_check(s)) {
4547 int size = vec_full_reg_size(s);
4548 int off = vec_full_reg_offset(s, a->rd);
4549 do_str(s, off, size, a->rn, a->imm * size);
4550 }
4551 return true;
4552}
4553
3a7be554 4554static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5047c204
RH
4555{
4556 if (sve_access_check(s)) {
4557 int size = pred_full_reg_size(s);
4558 int off = pred_full_reg_offset(s, a->rd);
4559 do_str(s, off, size, a->rn, a->imm * size);
4560 }
4561 return true;
4562}
4563
c4e7c493
RH
4564/*
4565 *** SVE Memory - Contiguous Load Group
4566 */
4567
4568/* The memory mode of the dtype. */
4569static const TCGMemOp dtype_mop[16] = {
4570 MO_UB, MO_UB, MO_UB, MO_UB,
4571 MO_SL, MO_UW, MO_UW, MO_UW,
4572 MO_SW, MO_SW, MO_UL, MO_UL,
4573 MO_SB, MO_SB, MO_SB, MO_Q
4574};
4575
4576#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4577
4578/* The vector element size of dtype. */
4579static const uint8_t dtype_esz[16] = {
4580 0, 1, 2, 3,
4581 3, 1, 2, 3,
4582 3, 2, 2, 3,
4583 3, 2, 1, 3
4584};
4585
500d0484
RH
4586static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
4587{
4588 return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
4589}
4590
c4e7c493 4591static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
500d0484 4592 int dtype, gen_helper_gvec_mem *fn)
c4e7c493
RH
4593{
4594 unsigned vsz = vec_full_reg_size(s);
4595 TCGv_ptr t_pg;
500d0484
RH
4596 TCGv_i32 t_desc;
4597 int desc;
c4e7c493
RH
4598
4599 /* For e.g. LD4, there are not enough arguments to pass all 4
4600 * registers as pointers, so encode the regno into the data field.
4601 * For consistency, do this even for LD1.
4602 */
500d0484
RH
4603 desc = sve_memopidx(s, dtype);
4604 desc |= zt << MEMOPIDX_SHIFT;
4605 desc = simd_desc(vsz, vsz, desc);
4606 t_desc = tcg_const_i32(desc);
c4e7c493
RH
4607 t_pg = tcg_temp_new_ptr();
4608
4609 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
500d0484 4610 fn(cpu_env, t_pg, addr, t_desc);
c4e7c493
RH
4611
4612 tcg_temp_free_ptr(t_pg);
500d0484 4613 tcg_temp_free_i32(t_desc);
c4e7c493
RH
4614}
4615
4616static void do_ld_zpa(DisasContext *s, int zt, int pg,
4617 TCGv_i64 addr, int dtype, int nreg)
4618{
7d0a57a2
RH
4619 static gen_helper_gvec_mem * const fns[2][16][4] = {
4620 /* Little-endian */
4621 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4622 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4623 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4624 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4625 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4626
4627 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4628 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4629 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4630 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4631 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4632
4633 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4634 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4635 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4636 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4637 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4638
4639 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4640 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4641 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4642 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4643 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4644
4645 /* Big-endian */
4646 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4647 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4648 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4649 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4650 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4651
4652 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4653 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4654 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4655 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4656 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4657
4658 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4659 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4660 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4661 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4662 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4663
4664 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4665 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4666 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4667 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4668 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
c4e7c493 4669 };
7d0a57a2 4670 gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
c4e7c493
RH
4671
4672 /* While there are holes in the table, they are not
4673 * accessible via the instruction encoding.
4674 */
4675 assert(fn != NULL);
500d0484 4676 do_mem_zpa(s, zt, pg, addr, dtype, fn);
c4e7c493
RH
4677}
4678
3a7be554 4679static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
c4e7c493
RH
4680{
4681 if (a->rm == 31) {
4682 return false;
4683 }
4684 if (sve_access_check(s)) {
4685 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 4686 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
c4e7c493
RH
4687 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4688 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4689 }
4690 return true;
4691}
4692
3a7be554 4693static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
c4e7c493
RH
4694{
4695 if (sve_access_check(s)) {
4696 int vsz = vec_full_reg_size(s);
4697 int elements = vsz >> dtype_esz[a->dtype];
4698 TCGv_i64 addr = new_tmp_a64(s);
4699
4700 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4701 (a->imm * elements * (a->nreg + 1))
4702 << dtype_msz(a->dtype));
4703 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4704 }
4705 return true;
4706}
e2654d75 4707
3a7be554 4708static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
e2654d75 4709{
7d0a57a2
RH
4710 static gen_helper_gvec_mem * const fns[2][16] = {
4711 /* Little-endian */
4712 { gen_helper_sve_ldff1bb_r,
4713 gen_helper_sve_ldff1bhu_r,
4714 gen_helper_sve_ldff1bsu_r,
4715 gen_helper_sve_ldff1bdu_r,
4716
4717 gen_helper_sve_ldff1sds_le_r,
4718 gen_helper_sve_ldff1hh_le_r,
4719 gen_helper_sve_ldff1hsu_le_r,
4720 gen_helper_sve_ldff1hdu_le_r,
4721
4722 gen_helper_sve_ldff1hds_le_r,
4723 gen_helper_sve_ldff1hss_le_r,
4724 gen_helper_sve_ldff1ss_le_r,
4725 gen_helper_sve_ldff1sdu_le_r,
4726
4727 gen_helper_sve_ldff1bds_r,
4728 gen_helper_sve_ldff1bss_r,
4729 gen_helper_sve_ldff1bhs_r,
4730 gen_helper_sve_ldff1dd_le_r },
4731
4732 /* Big-endian */
4733 { gen_helper_sve_ldff1bb_r,
4734 gen_helper_sve_ldff1bhu_r,
4735 gen_helper_sve_ldff1bsu_r,
4736 gen_helper_sve_ldff1bdu_r,
4737
4738 gen_helper_sve_ldff1sds_be_r,
4739 gen_helper_sve_ldff1hh_be_r,
4740 gen_helper_sve_ldff1hsu_be_r,
4741 gen_helper_sve_ldff1hdu_be_r,
4742
4743 gen_helper_sve_ldff1hds_be_r,
4744 gen_helper_sve_ldff1hss_be_r,
4745 gen_helper_sve_ldff1ss_be_r,
4746 gen_helper_sve_ldff1sdu_be_r,
4747
4748 gen_helper_sve_ldff1bds_r,
4749 gen_helper_sve_ldff1bss_r,
4750 gen_helper_sve_ldff1bhs_r,
4751 gen_helper_sve_ldff1dd_be_r },
e2654d75
RH
4752 };
4753
4754 if (sve_access_check(s)) {
4755 TCGv_i64 addr = new_tmp_a64(s);
4756 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4757 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
500d0484
RH
4758 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4759 fns[s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4760 }
4761 return true;
4762}
4763
3a7be554 4764static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
e2654d75 4765{
7d0a57a2
RH
4766 static gen_helper_gvec_mem * const fns[2][16] = {
4767 /* Little-endian */
4768 { gen_helper_sve_ldnf1bb_r,
4769 gen_helper_sve_ldnf1bhu_r,
4770 gen_helper_sve_ldnf1bsu_r,
4771 gen_helper_sve_ldnf1bdu_r,
4772
4773 gen_helper_sve_ldnf1sds_le_r,
4774 gen_helper_sve_ldnf1hh_le_r,
4775 gen_helper_sve_ldnf1hsu_le_r,
4776 gen_helper_sve_ldnf1hdu_le_r,
4777
4778 gen_helper_sve_ldnf1hds_le_r,
4779 gen_helper_sve_ldnf1hss_le_r,
4780 gen_helper_sve_ldnf1ss_le_r,
4781 gen_helper_sve_ldnf1sdu_le_r,
4782
4783 gen_helper_sve_ldnf1bds_r,
4784 gen_helper_sve_ldnf1bss_r,
4785 gen_helper_sve_ldnf1bhs_r,
4786 gen_helper_sve_ldnf1dd_le_r },
4787
4788 /* Big-endian */
4789 { gen_helper_sve_ldnf1bb_r,
4790 gen_helper_sve_ldnf1bhu_r,
4791 gen_helper_sve_ldnf1bsu_r,
4792 gen_helper_sve_ldnf1bdu_r,
4793
4794 gen_helper_sve_ldnf1sds_be_r,
4795 gen_helper_sve_ldnf1hh_be_r,
4796 gen_helper_sve_ldnf1hsu_be_r,
4797 gen_helper_sve_ldnf1hdu_be_r,
4798
4799 gen_helper_sve_ldnf1hds_be_r,
4800 gen_helper_sve_ldnf1hss_be_r,
4801 gen_helper_sve_ldnf1ss_be_r,
4802 gen_helper_sve_ldnf1sdu_be_r,
4803
4804 gen_helper_sve_ldnf1bds_r,
4805 gen_helper_sve_ldnf1bss_r,
4806 gen_helper_sve_ldnf1bhs_r,
4807 gen_helper_sve_ldnf1dd_be_r },
e2654d75
RH
4808 };
4809
4810 if (sve_access_check(s)) {
4811 int vsz = vec_full_reg_size(s);
4812 int elements = vsz >> dtype_esz[a->dtype];
4813 int off = (a->imm * elements) << dtype_msz(a->dtype);
4814 TCGv_i64 addr = new_tmp_a64(s);
4815
4816 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
500d0484
RH
4817 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4818 fns[s->be_data == MO_BE][a->dtype]);
e2654d75
RH
4819 }
4820 return true;
4821}
1a039c7e 4822
05abe304
RH
4823static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4824{
7d0a57a2
RH
4825 static gen_helper_gvec_mem * const fns[2][4] = {
4826 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4827 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4828 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4829 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
05abe304
RH
4830 };
4831 unsigned vsz = vec_full_reg_size(s);
4832 TCGv_ptr t_pg;
500d0484
RH
4833 TCGv_i32 t_desc;
4834 int desc, poff;
05abe304
RH
4835
4836 /* Load the first quadword using the normal predicated load helpers. */
451e4ffd 4837 desc = sve_memopidx(s, msz_dtype(s, msz));
500d0484
RH
4838 desc |= zt << MEMOPIDX_SHIFT;
4839 desc = simd_desc(16, 16, desc);
4840 t_desc = tcg_const_i32(desc);
2a99ab2b
RH
4841
4842 poff = pred_full_reg_offset(s, pg);
4843 if (vsz > 16) {
4844 /*
4845 * Zero-extend the first 16 bits of the predicate into a temporary.
4846 * This avoids triggering an assert making sure we don't have bits
4847 * set within a predicate beyond VQ, but we have lowered VQ to 1
4848 * for this load operation.
4849 */
4850 TCGv_i64 tmp = tcg_temp_new_i64();
4851#ifdef HOST_WORDS_BIGENDIAN
4852 poff += 6;
4853#endif
4854 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4855
4856 poff = offsetof(CPUARMState, vfp.preg_tmp);
4857 tcg_gen_st_i64(tmp, cpu_env, poff);
4858 tcg_temp_free_i64(tmp);
4859 }
4860
05abe304 4861 t_pg = tcg_temp_new_ptr();
2a99ab2b 4862 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
05abe304 4863
500d0484 4864 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
05abe304
RH
4865
4866 tcg_temp_free_ptr(t_pg);
500d0484 4867 tcg_temp_free_i32(t_desc);
05abe304
RH
4868
4869 /* Replicate that first quadword. */
4870 if (vsz > 16) {
4871 unsigned dofs = vec_full_reg_offset(s, zt);
4872 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4873 }
4874}
4875
3a7be554 4876static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
05abe304
RH
4877{
4878 if (a->rm == 31) {
4879 return false;
4880 }
4881 if (sve_access_check(s)) {
4882 int msz = dtype_msz(a->dtype);
4883 TCGv_i64 addr = new_tmp_a64(s);
4884 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4885 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4886 do_ldrq(s, a->rd, a->pg, addr, msz);
4887 }
4888 return true;
4889}
4890
3a7be554 4891static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
05abe304
RH
4892{
4893 if (sve_access_check(s)) {
4894 TCGv_i64 addr = new_tmp_a64(s);
4895 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4896 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4897 }
4898 return true;
4899}
4900
68459864 4901/* Load and broadcast element. */
3a7be554 4902static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
68459864
RH
4903{
4904 if (!sve_access_check(s)) {
4905 return true;
4906 }
4907
4908 unsigned vsz = vec_full_reg_size(s);
4909 unsigned psz = pred_full_reg_size(s);
4910 unsigned esz = dtype_esz[a->dtype];
d0e372b0 4911 unsigned msz = dtype_msz(a->dtype);
68459864
RH
4912 TCGLabel *over = gen_new_label();
4913 TCGv_i64 temp;
4914
4915 /* If the guarding predicate has no bits set, no load occurs. */
4916 if (psz <= 8) {
4917 /* Reduce the pred_esz_masks value simply to reduce the
4918 * size of the code generated here.
4919 */
4920 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4921 temp = tcg_temp_new_i64();
4922 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4923 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4924 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4925 tcg_temp_free_i64(temp);
4926 } else {
4927 TCGv_i32 t32 = tcg_temp_new_i32();
4928 find_last_active(s, t32, esz, a->pg);
4929 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4930 tcg_temp_free_i32(t32);
4931 }
4932
4933 /* Load the data. */
4934 temp = tcg_temp_new_i64();
d0e372b0 4935 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
68459864
RH
4936 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4937 s->be_data | dtype_mop[a->dtype]);
4938
4939 /* Broadcast to *all* elements. */
4940 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4941 vsz, vsz, temp);
4942 tcg_temp_free_i64(temp);
4943
4944 /* Zero the inactive elements. */
4945 gen_set_label(over);
4946 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4947 return true;
4948}
4949
1a039c7e
RH
4950static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4951 int msz, int esz, int nreg)
4952{
28d57f2d
RH
4953 static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4954 { { gen_helper_sve_st1bb_r,
4955 gen_helper_sve_st1bh_r,
4956 gen_helper_sve_st1bs_r,
4957 gen_helper_sve_st1bd_r },
4958 { NULL,
4959 gen_helper_sve_st1hh_le_r,
4960 gen_helper_sve_st1hs_le_r,
4961 gen_helper_sve_st1hd_le_r },
4962 { NULL, NULL,
4963 gen_helper_sve_st1ss_le_r,
4964 gen_helper_sve_st1sd_le_r },
4965 { NULL, NULL, NULL,
4966 gen_helper_sve_st1dd_le_r } },
4967 { { gen_helper_sve_st1bb_r,
4968 gen_helper_sve_st1bh_r,
4969 gen_helper_sve_st1bs_r,
4970 gen_helper_sve_st1bd_r },
4971 { NULL,
4972 gen_helper_sve_st1hh_be_r,
4973 gen_helper_sve_st1hs_be_r,
4974 gen_helper_sve_st1hd_be_r },
4975 { NULL, NULL,
4976 gen_helper_sve_st1ss_be_r,
4977 gen_helper_sve_st1sd_be_r },
4978 { NULL, NULL, NULL,
4979 gen_helper_sve_st1dd_be_r } },
1a039c7e 4980 };
28d57f2d
RH
4981 static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4982 { { gen_helper_sve_st2bb_r,
4983 gen_helper_sve_st2hh_le_r,
4984 gen_helper_sve_st2ss_le_r,
4985 gen_helper_sve_st2dd_le_r },
4986 { gen_helper_sve_st3bb_r,
4987 gen_helper_sve_st3hh_le_r,
4988 gen_helper_sve_st3ss_le_r,
4989 gen_helper_sve_st3dd_le_r },
4990 { gen_helper_sve_st4bb_r,
4991 gen_helper_sve_st4hh_le_r,
4992 gen_helper_sve_st4ss_le_r,
4993 gen_helper_sve_st4dd_le_r } },
4994 { { gen_helper_sve_st2bb_r,
4995 gen_helper_sve_st2hh_be_r,
4996 gen_helper_sve_st2ss_be_r,
4997 gen_helper_sve_st2dd_be_r },
4998 { gen_helper_sve_st3bb_r,
4999 gen_helper_sve_st3hh_be_r,
5000 gen_helper_sve_st3ss_be_r,
5001 gen_helper_sve_st3dd_be_r },
5002 { gen_helper_sve_st4bb_r,
5003 gen_helper_sve_st4hh_be_r,
5004 gen_helper_sve_st4ss_be_r,
5005 gen_helper_sve_st4dd_be_r } },
1a039c7e
RH
5006 };
5007 gen_helper_gvec_mem *fn;
28d57f2d 5008 int be = s->be_data == MO_BE;
1a039c7e
RH
5009
5010 if (nreg == 0) {
5011 /* ST1 */
28d57f2d 5012 fn = fn_single[be][msz][esz];
1a039c7e
RH
5013 } else {
5014 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5015 assert(msz == esz);
28d57f2d 5016 fn = fn_multiple[be][nreg - 1][msz];
1a039c7e
RH
5017 }
5018 assert(fn != NULL);
451e4ffd 5019 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn);
1a039c7e
RH
5020}
5021
3a7be554 5022static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
1a039c7e
RH
5023{
5024 if (a->rm == 31 || a->msz > a->esz) {
5025 return false;
5026 }
5027 if (sve_access_check(s)) {
5028 TCGv_i64 addr = new_tmp_a64(s);
50ef1cbf 5029 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
1a039c7e
RH
5030 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5031 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5032 }
5033 return true;
5034}
5035
3a7be554 5036static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
1a039c7e
RH
5037{
5038 if (a->msz > a->esz) {
5039 return false;
5040 }
5041 if (sve_access_check(s)) {
5042 int vsz = vec_full_reg_size(s);
5043 int elements = vsz >> a->esz;
5044 TCGv_i64 addr = new_tmp_a64(s);
5045
5046 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5047 (a->imm * elements * (a->nreg + 1)) << a->msz);
5048 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5049 }
5050 return true;
5051}
f6dbf62a
RH
5052
5053/*
5054 *** SVE gather loads / scatter stores
5055 */
5056
500d0484
RH
5057static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5058 int scale, TCGv_i64 scalar, int msz,
5059 gen_helper_gvec_mem_scatter *fn)
f6dbf62a
RH
5060{
5061 unsigned vsz = vec_full_reg_size(s);
f6dbf62a
RH
5062 TCGv_ptr t_zm = tcg_temp_new_ptr();
5063 TCGv_ptr t_pg = tcg_temp_new_ptr();
5064 TCGv_ptr t_zt = tcg_temp_new_ptr();
500d0484
RH
5065 TCGv_i32 t_desc;
5066 int desc;
5067
451e4ffd 5068 desc = sve_memopidx(s, msz_dtype(s, msz));
500d0484
RH
5069 desc |= scale << MEMOPIDX_SHIFT;
5070 desc = simd_desc(vsz, vsz, desc);
5071 t_desc = tcg_const_i32(desc);
f6dbf62a
RH
5072
5073 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5074 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5075 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
500d0484 5076 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
f6dbf62a
RH
5077
5078 tcg_temp_free_ptr(t_zt);
5079 tcg_temp_free_ptr(t_zm);
5080 tcg_temp_free_ptr(t_pg);
500d0484 5081 tcg_temp_free_i32(t_desc);
f6dbf62a
RH
5082}
5083
d4f75f25
RH
5084/* Indexed by [be][ff][xs][u][msz]. */
5085static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
5086 /* Little-endian */
5087 { { { { gen_helper_sve_ldbss_zsu,
5088 gen_helper_sve_ldhss_le_zsu,
5089 NULL, },
5090 { gen_helper_sve_ldbsu_zsu,
5091 gen_helper_sve_ldhsu_le_zsu,
5092 gen_helper_sve_ldss_le_zsu, } },
5093 { { gen_helper_sve_ldbss_zss,
5094 gen_helper_sve_ldhss_le_zss,
5095 NULL, },
5096 { gen_helper_sve_ldbsu_zss,
5097 gen_helper_sve_ldhsu_le_zss,
5098 gen_helper_sve_ldss_le_zss, } } },
5099
5100 /* First-fault */
5101 { { { gen_helper_sve_ldffbss_zsu,
116347ce 5102 gen_helper_sve_ldffhss_le_zsu,
d4f75f25
RH
5103 NULL, },
5104 { gen_helper_sve_ldffbsu_zsu,
116347ce
RH
5105 gen_helper_sve_ldffhsu_le_zsu,
5106 gen_helper_sve_ldffss_le_zsu, } },
d4f75f25 5107 { { gen_helper_sve_ldffbss_zss,
116347ce 5108 gen_helper_sve_ldffhss_le_zss,
d4f75f25
RH
5109 NULL, },
5110 { gen_helper_sve_ldffbsu_zss,
116347ce
RH
5111 gen_helper_sve_ldffhsu_le_zss,
5112 gen_helper_sve_ldffss_le_zss, } } } },
d4f75f25
RH
5113
5114 /* Big-endian */
5115 { { { { gen_helper_sve_ldbss_zsu,
5116 gen_helper_sve_ldhss_be_zsu,
5117 NULL, },
5118 { gen_helper_sve_ldbsu_zsu,
5119 gen_helper_sve_ldhsu_be_zsu,
5120 gen_helper_sve_ldss_be_zsu, } },
5121 { { gen_helper_sve_ldbss_zss,
5122 gen_helper_sve_ldhss_be_zss,
5123 NULL, },
5124 { gen_helper_sve_ldbsu_zss,
5125 gen_helper_sve_ldhsu_be_zss,
5126 gen_helper_sve_ldss_be_zss, } } },
5127
5128 /* First-fault */
5129 { { { gen_helper_sve_ldffbss_zsu,
116347ce 5130 gen_helper_sve_ldffhss_be_zsu,
d4f75f25
RH
5131 NULL, },
5132 { gen_helper_sve_ldffbsu_zsu,
116347ce
RH
5133 gen_helper_sve_ldffhsu_be_zsu,
5134 gen_helper_sve_ldffss_be_zsu, } },
d4f75f25 5135 { { gen_helper_sve_ldffbss_zss,
116347ce 5136 gen_helper_sve_ldffhss_be_zss,
d4f75f25
RH
5137 NULL, },
5138 { gen_helper_sve_ldffbsu_zss,
116347ce
RH
5139 gen_helper_sve_ldffhsu_be_zss,
5140 gen_helper_sve_ldffss_be_zss, } } } },
673e9fa6
RH
5141};
5142
5143/* Note that we overload xs=2 to indicate 64-bit offset. */
d4f75f25
RH
5144static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
5145 /* Little-endian */
5146 { { { { gen_helper_sve_ldbds_zsu,
5147 gen_helper_sve_ldhds_le_zsu,
5148 gen_helper_sve_ldsds_le_zsu,
5149 NULL, },
5150 { gen_helper_sve_ldbdu_zsu,
5151 gen_helper_sve_ldhdu_le_zsu,
5152 gen_helper_sve_ldsdu_le_zsu,
5153 gen_helper_sve_lddd_le_zsu, } },
5154 { { gen_helper_sve_ldbds_zss,
5155 gen_helper_sve_ldhds_le_zss,
5156 gen_helper_sve_ldsds_le_zss,
5157 NULL, },
5158 { gen_helper_sve_ldbdu_zss,
5159 gen_helper_sve_ldhdu_le_zss,
5160 gen_helper_sve_ldsdu_le_zss,
5161 gen_helper_sve_lddd_le_zss, } },
5162 { { gen_helper_sve_ldbds_zd,
5163 gen_helper_sve_ldhds_le_zd,
5164 gen_helper_sve_ldsds_le_zd,
5165 NULL, },
5166 { gen_helper_sve_ldbdu_zd,
5167 gen_helper_sve_ldhdu_le_zd,
5168 gen_helper_sve_ldsdu_le_zd,
5169 gen_helper_sve_lddd_le_zd, } } },
5170
5171 /* First-fault */
5172 { { { gen_helper_sve_ldffbds_zsu,
116347ce
RH
5173 gen_helper_sve_ldffhds_le_zsu,
5174 gen_helper_sve_ldffsds_le_zsu,
d4f75f25
RH
5175 NULL, },
5176 { gen_helper_sve_ldffbdu_zsu,
116347ce
RH
5177 gen_helper_sve_ldffhdu_le_zsu,
5178 gen_helper_sve_ldffsdu_le_zsu,
5179 gen_helper_sve_ldffdd_le_zsu, } },
d4f75f25 5180 { { gen_helper_sve_ldffbds_zss,
116347ce
RH
5181 gen_helper_sve_ldffhds_le_zss,
5182 gen_helper_sve_ldffsds_le_zss,
d4f75f25
RH
5183 NULL, },
5184 { gen_helper_sve_ldffbdu_zss,
116347ce
RH
5185 gen_helper_sve_ldffhdu_le_zss,
5186 gen_helper_sve_ldffsdu_le_zss,
5187 gen_helper_sve_ldffdd_le_zss, } },
d4f75f25 5188 { { gen_helper_sve_ldffbds_zd,
116347ce
RH
5189 gen_helper_sve_ldffhds_le_zd,
5190 gen_helper_sve_ldffsds_le_zd,
d4f75f25
RH
5191 NULL, },
5192 { gen_helper_sve_ldffbdu_zd,
116347ce
RH
5193 gen_helper_sve_ldffhdu_le_zd,
5194 gen_helper_sve_ldffsdu_le_zd,
5195 gen_helper_sve_ldffdd_le_zd, } } } },
d4f75f25
RH
5196
5197 /* Big-endian */
5198 { { { { gen_helper_sve_ldbds_zsu,
5199 gen_helper_sve_ldhds_be_zsu,
5200 gen_helper_sve_ldsds_be_zsu,
5201 NULL, },
5202 { gen_helper_sve_ldbdu_zsu,
5203 gen_helper_sve_ldhdu_be_zsu,
5204 gen_helper_sve_ldsdu_be_zsu,
5205 gen_helper_sve_lddd_be_zsu, } },
5206 { { gen_helper_sve_ldbds_zss,
5207 gen_helper_sve_ldhds_be_zss,
5208 gen_helper_sve_ldsds_be_zss,
5209 NULL, },
5210 { gen_helper_sve_ldbdu_zss,
5211 gen_helper_sve_ldhdu_be_zss,
5212 gen_helper_sve_ldsdu_be_zss,
5213 gen_helper_sve_lddd_be_zss, } },
5214 { { gen_helper_sve_ldbds_zd,
5215 gen_helper_sve_ldhds_be_zd,
5216 gen_helper_sve_ldsds_be_zd,
5217 NULL, },
5218 { gen_helper_sve_ldbdu_zd,
5219 gen_helper_sve_ldhdu_be_zd,
5220 gen_helper_sve_ldsdu_be_zd,
5221 gen_helper_sve_lddd_be_zd, } } },
5222
5223 /* First-fault */
5224 { { { gen_helper_sve_ldffbds_zsu,
116347ce
RH
5225 gen_helper_sve_ldffhds_be_zsu,
5226 gen_helper_sve_ldffsds_be_zsu,
d4f75f25
RH
5227 NULL, },
5228 { gen_helper_sve_ldffbdu_zsu,
116347ce
RH
5229 gen_helper_sve_ldffhdu_be_zsu,
5230 gen_helper_sve_ldffsdu_be_zsu,
5231 gen_helper_sve_ldffdd_be_zsu, } },
d4f75f25 5232 { { gen_helper_sve_ldffbds_zss,
116347ce
RH
5233 gen_helper_sve_ldffhds_be_zss,
5234 gen_helper_sve_ldffsds_be_zss,
d4f75f25
RH
5235 NULL, },
5236 { gen_helper_sve_ldffbdu_zss,
116347ce
RH
5237 gen_helper_sve_ldffhdu_be_zss,
5238 gen_helper_sve_ldffsdu_be_zss,
5239 gen_helper_sve_ldffdd_be_zss, } },
d4f75f25 5240 { { gen_helper_sve_ldffbds_zd,
116347ce
RH
5241 gen_helper_sve_ldffhds_be_zd,
5242 gen_helper_sve_ldffsds_be_zd,
d4f75f25
RH
5243 NULL, },
5244 { gen_helper_sve_ldffbdu_zd,
116347ce
RH
5245 gen_helper_sve_ldffhdu_be_zd,
5246 gen_helper_sve_ldffsdu_be_zd,
5247 gen_helper_sve_ldffdd_be_zd, } } } },
673e9fa6
RH
5248};
5249
3a7be554 5250static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
673e9fa6
RH
5251{
5252 gen_helper_gvec_mem_scatter *fn = NULL;
d4f75f25 5253 int be = s->be_data == MO_BE;
673e9fa6
RH
5254
5255 if (!sve_access_check(s)) {
5256 return true;
5257 }
5258
5259 switch (a->esz) {
5260 case MO_32:
d4f75f25 5261 fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5262 break;
5263 case MO_64:
d4f75f25 5264 fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
673e9fa6
RH
5265 break;
5266 }
5267 assert(fn != NULL);
5268
5269 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
500d0484 5270 cpu_reg_sp(s, a->rn), a->msz, fn);
673e9fa6
RH
5271 return true;
5272}
5273
3a7be554 5274static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
673e9fa6
RH
5275{
5276 gen_helper_gvec_mem_scatter *fn = NULL;
d4f75f25 5277 int be = s->be_data == MO_BE;
673e9fa6
RH
5278 TCGv_i64 imm;
5279
5280 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5281 return false;
5282 }
5283 if (!sve_access_check(s)) {
5284 return true;
5285 }
5286
5287 switch (a->esz) {
5288 case MO_32:
d4f75f25 5289 fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
673e9fa6
RH
5290 break;
5291 case MO_64:
d4f75f25 5292 fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
673e9fa6
RH
5293 break;
5294 }
5295 assert(fn != NULL);
5296
5297 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5298 * by loading the immediate into the scalar parameter.
5299 */
5300 imm = tcg_const_i64(a->imm << a->msz);
500d0484 5301 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
673e9fa6
RH
5302 tcg_temp_free_i64(imm);
5303 return true;
5304}
5305
78cf1b88
RH
5306/* Indexed by [be][xs][msz]. */
5307static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
5308 /* Little-endian */
5309 { { gen_helper_sve_stbs_zsu,
5310 gen_helper_sve_sths_le_zsu,
5311 gen_helper_sve_stss_le_zsu, },
5312 { gen_helper_sve_stbs_zss,
5313 gen_helper_sve_sths_le_zss,
5314 gen_helper_sve_stss_le_zss, } },
5315 /* Big-endian */
5316 { { gen_helper_sve_stbs_zsu,
5317 gen_helper_sve_sths_be_zsu,
5318 gen_helper_sve_stss_be_zsu, },
5319 { gen_helper_sve_stbs_zss,
5320 gen_helper_sve_sths_be_zss,
5321 gen_helper_sve_stss_be_zss, } },
408ecde9
RH
5322};
5323
5324/* Note that we overload xs=2 to indicate 64-bit offset. */
78cf1b88
RH
5325static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
5326 /* Little-endian */
5327 { { gen_helper_sve_stbd_zsu,
5328 gen_helper_sve_sthd_le_zsu,
5329 gen_helper_sve_stsd_le_zsu,
5330 gen_helper_sve_stdd_le_zsu, },
5331 { gen_helper_sve_stbd_zss,
5332 gen_helper_sve_sthd_le_zss,
5333 gen_helper_sve_stsd_le_zss,
5334 gen_helper_sve_stdd_le_zss, },
5335 { gen_helper_sve_stbd_zd,
5336 gen_helper_sve_sthd_le_zd,
5337 gen_helper_sve_stsd_le_zd,
5338 gen_helper_sve_stdd_le_zd, } },
5339 /* Big-endian */
5340 { { gen_helper_sve_stbd_zsu,
5341 gen_helper_sve_sthd_be_zsu,
5342 gen_helper_sve_stsd_be_zsu,
5343 gen_helper_sve_stdd_be_zsu, },
5344 { gen_helper_sve_stbd_zss,
5345 gen_helper_sve_sthd_be_zss,
5346 gen_helper_sve_stsd_be_zss,
5347 gen_helper_sve_stdd_be_zss, },
5348 { gen_helper_sve_stbd_zd,
5349 gen_helper_sve_sthd_be_zd,
5350 gen_helper_sve_stsd_be_zd,
5351 gen_helper_sve_stdd_be_zd, } },
408ecde9
RH
5352};
5353
3a7be554 5354static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
f6dbf62a 5355{
f6dbf62a 5356 gen_helper_gvec_mem_scatter *fn;
78cf1b88 5357 int be = s->be_data == MO_BE;
f6dbf62a
RH
5358
5359 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5360 return false;
5361 }
5362 if (!sve_access_check(s)) {
5363 return true;
5364 }
5365 switch (a->esz) {
5366 case MO_32:
78cf1b88 5367 fn = scatter_store_fn32[be][a->xs][a->msz];
f6dbf62a
RH
5368 break;
5369 case MO_64:
78cf1b88 5370 fn = scatter_store_fn64[be][a->xs][a->msz];
f6dbf62a
RH
5371 break;
5372 default:
5373 g_assert_not_reached();
5374 }
5375 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
500d0484 5376 cpu_reg_sp(s, a->rn), a->msz, fn);
f6dbf62a
RH
5377 return true;
5378}
dec6cf6b 5379
3a7be554 5380static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
408ecde9
RH
5381{
5382 gen_helper_gvec_mem_scatter *fn = NULL;
78cf1b88 5383 int be = s->be_data == MO_BE;
408ecde9
RH
5384 TCGv_i64 imm;
5385
5386 if (a->esz < a->msz) {
5387 return false;
5388 }
5389 if (!sve_access_check(s)) {
5390 return true;
5391 }
5392
5393 switch (a->esz) {
5394 case MO_32:
78cf1b88 5395 fn = scatter_store_fn32[be][0][a->msz];
408ecde9
RH
5396 break;
5397 case MO_64:
78cf1b88 5398 fn = scatter_store_fn64[be][2][a->msz];
408ecde9
RH
5399 break;
5400 }
5401 assert(fn != NULL);
5402
5403 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5404 * by loading the immediate into the scalar parameter.
5405 */
5406 imm = tcg_const_i64(a->imm << a->msz);
500d0484 5407 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
408ecde9
RH
5408 tcg_temp_free_i64(imm);
5409 return true;
5410}
5411
dec6cf6b
RH
5412/*
5413 * Prefetches
5414 */
5415
3a7be554 5416static bool trans_PRF(DisasContext *s, arg_PRF *a)
dec6cf6b
RH
5417{
5418 /* Prefetch is a nop within QEMU. */
2f95a3b0 5419 (void)sve_access_check(s);
dec6cf6b
RH
5420 return true;
5421}
5422
3a7be554 5423static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
dec6cf6b
RH
5424{
5425 if (a->rm == 31) {
5426 return false;
5427 }
5428 /* Prefetch is a nop within QEMU. */
2f95a3b0 5429 (void)sve_access_check(s);
dec6cf6b
RH
5430 return true;
5431}
a2103582
RH
5432
5433/*
5434 * Move Prefix
5435 *
5436 * TODO: The implementation so far could handle predicated merging movprfx.
5437 * The helper functions as written take an extra source register to
5438 * use in the operation, but the result is only written when predication
5439 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5440 * to allow the final write back to the destination to be unconditional.
5441 * For predicated zeroing movprfx, we need to rearrange the helpers to
5442 * allow the final write back to zero inactives.
5443 *
5444 * In the meantime, just emit the moves.
5445 */
5446
3a7be554 5447static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
a2103582
RH
5448{
5449 return do_mov_z(s, a->rd, a->rn);
5450}
5451
3a7be554 5452static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
5453{
5454 if (sve_access_check(s)) {
5455 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5456 }
5457 return true;
5458}
5459
3a7be554 5460static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
a2103582
RH
5461{
5462 if (sve_access_check(s)) {
5463 do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5464 }
5465 return true;
5466}