]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement SVE scatter store vector immediate
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 48
ccd841c3
RH
49/*
50 * Helpers for extracting complex instruction fields.
51 */
52
53/* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
56static int tszimm_esz(int x)
57{
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60}
61
62static int tszimm_shr(int x)
63{
64 return (16 << tszimm_esz(x)) - x;
65}
66
67/* See e.g. LSL (immediate, predicated). */
68static int tszimm_shl(int x)
69{
70 return x - (8 << tszimm_esz(x));
71}
72
24e82e68
RH
73static inline int plus1(int x)
74{
75 return x + 1;
76}
77
f25a2361
RH
78/* The SH bit is in bit 8. Extract the low 8 and shift. */
79static inline int expand_imm_sh8s(int x)
80{
81 return (int8_t)x << (x & 0x100 ? 8 : 0);
82}
83
6e6a157d
RH
84static inline int expand_imm_sh8u(int x)
85{
86 return (uint8_t)x << (x & 0x100 ? 8 : 0);
87}
88
c4e7c493
RH
89/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
90 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
91 */
92static inline int msz_dtype(int msz)
93{
94 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
95 return dtype[msz];
96}
97
38388f7e
RH
98/*
99 * Include the generated decoder.
100 */
101
102#include "decode-sve.inc.c"
103
104/*
105 * Implement all of the translator functions referenced by the decoder.
106 */
107
d1822297
RH
108/* Return the offset info CPUARMState of the predicate vector register Pn.
109 * Note for this purpose, FFR is P16.
110 */
111static inline int pred_full_reg_offset(DisasContext *s, int regno)
112{
113 return offsetof(CPUARMState, vfp.pregs[regno]);
114}
115
116/* Return the byte size of the whole predicate register, VL / 64. */
117static inline int pred_full_reg_size(DisasContext *s)
118{
119 return s->sve_len >> 3;
120}
121
516e246a
RH
122/* Round up the size of a register to a size allowed by
123 * the tcg vector infrastructure. Any operation which uses this
124 * size may assume that the bits above pred_full_reg_size are zero,
125 * and must leave them the same way.
126 *
127 * Note that this is not needed for the vector registers as they
128 * are always properly sized for tcg vectors.
129 */
130static int size_for_gvec(int size)
131{
132 if (size <= 8) {
133 return 8;
134 } else {
135 return QEMU_ALIGN_UP(size, 16);
136 }
137}
138
139static int pred_gvec_reg_size(DisasContext *s)
140{
141 return size_for_gvec(pred_full_reg_size(s));
142}
143
39eea561
RH
144/* Invoke a vector expander on two Zregs. */
145static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
146 int esz, int rd, int rn)
38388f7e 147{
39eea561
RH
148 if (sve_access_check(s)) {
149 unsigned vsz = vec_full_reg_size(s);
150 gvec_fn(esz, vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn), vsz, vsz);
152 }
153 return true;
38388f7e
RH
154}
155
39eea561
RH
156/* Invoke a vector expander on three Zregs. */
157static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
158 int esz, int rd, int rn, int rm)
38388f7e 159{
39eea561
RH
160 if (sve_access_check(s)) {
161 unsigned vsz = vec_full_reg_size(s);
162 gvec_fn(esz, vec_full_reg_offset(s, rd),
163 vec_full_reg_offset(s, rn),
164 vec_full_reg_offset(s, rm), vsz, vsz);
165 }
166 return true;
38388f7e
RH
167}
168
39eea561
RH
169/* Invoke a vector move on two Zregs. */
170static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 171{
39eea561 172 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
173}
174
d9d78dcc
RH
175/* Initialize a Zreg with replications of a 64-bit immediate. */
176static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
177{
178 unsigned vsz = vec_full_reg_size(s);
179 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
180}
181
516e246a
RH
182/* Invoke a vector expander on two Pregs. */
183static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
184 int esz, int rd, int rn)
185{
186 if (sve_access_check(s)) {
187 unsigned psz = pred_gvec_reg_size(s);
188 gvec_fn(esz, pred_full_reg_offset(s, rd),
189 pred_full_reg_offset(s, rn), psz, psz);
190 }
191 return true;
192}
193
194/* Invoke a vector expander on three Pregs. */
195static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
196 int esz, int rd, int rn, int rm)
197{
198 if (sve_access_check(s)) {
199 unsigned psz = pred_gvec_reg_size(s);
200 gvec_fn(esz, pred_full_reg_offset(s, rd),
201 pred_full_reg_offset(s, rn),
202 pred_full_reg_offset(s, rm), psz, psz);
203 }
204 return true;
205}
206
207/* Invoke a vector operation on four Pregs. */
208static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
209 int rd, int rn, int rm, int rg)
210{
211 if (sve_access_check(s)) {
212 unsigned psz = pred_gvec_reg_size(s);
213 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
214 pred_full_reg_offset(s, rn),
215 pred_full_reg_offset(s, rm),
216 pred_full_reg_offset(s, rg),
217 psz, psz, gvec_op);
218 }
219 return true;
220}
221
222/* Invoke a vector move on two Pregs. */
223static bool do_mov_p(DisasContext *s, int rd, int rn)
224{
225 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
226}
227
9e18d7a6
RH
228/* Set the cpu flags as per a return from an SVE helper. */
229static void do_pred_flags(TCGv_i32 t)
230{
231 tcg_gen_mov_i32(cpu_NF, t);
232 tcg_gen_andi_i32(cpu_ZF, t, 2);
233 tcg_gen_andi_i32(cpu_CF, t, 1);
234 tcg_gen_movi_i32(cpu_VF, 0);
235}
236
237/* Subroutines computing the ARM PredTest psuedofunction. */
238static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
239{
240 TCGv_i32 t = tcg_temp_new_i32();
241
242 gen_helper_sve_predtest1(t, d, g);
243 do_pred_flags(t);
244 tcg_temp_free_i32(t);
245}
246
247static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
248{
249 TCGv_ptr dptr = tcg_temp_new_ptr();
250 TCGv_ptr gptr = tcg_temp_new_ptr();
251 TCGv_i32 t;
252
253 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
254 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
255 t = tcg_const_i32(words);
256
257 gen_helper_sve_predtest(t, dptr, gptr, t);
258 tcg_temp_free_ptr(dptr);
259 tcg_temp_free_ptr(gptr);
260
261 do_pred_flags(t);
262 tcg_temp_free_i32(t);
263}
264
028e2a7b
RH
265/* For each element size, the bits within a predicate word that are active. */
266const uint64_t pred_esz_masks[4] = {
267 0xffffffffffffffffull, 0x5555555555555555ull,
268 0x1111111111111111ull, 0x0101010101010101ull
269};
270
39eea561
RH
271/*
272 *** SVE Logical - Unpredicated Group
273 */
274
275static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
276{
277 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
278}
279
280static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
281{
282 if (a->rn == a->rm) { /* MOV */
283 return do_mov_z(s, a->rd, a->rn);
284 } else {
285 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
286 }
287}
288
289static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
290{
291 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
292}
293
294static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 295{
39eea561 296 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 297}
d1822297 298
fea98f9c
RH
299/*
300 *** SVE Integer Arithmetic - Unpredicated Group
301 */
302
303static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
304{
305 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
306}
307
308static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
309{
310 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
311}
312
313static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
314{
315 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
316}
317
318static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
319{
320 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
321}
322
323static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
324{
325 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
326}
327
328static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
329{
330 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
331}
332
f97cfd59
RH
333/*
334 *** SVE Integer Arithmetic - Binary Predicated Group
335 */
336
337static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
338{
339 unsigned vsz = vec_full_reg_size(s);
340 if (fn == NULL) {
341 return false;
342 }
343 if (sve_access_check(s)) {
344 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
345 vec_full_reg_offset(s, a->rn),
346 vec_full_reg_offset(s, a->rm),
347 pred_full_reg_offset(s, a->pg),
348 vsz, vsz, 0, fn);
349 }
350 return true;
351}
352
353#define DO_ZPZZ(NAME, name) \
354static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
355 uint32_t insn) \
356{ \
357 static gen_helper_gvec_4 * const fns[4] = { \
358 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
359 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
360 }; \
361 return do_zpzz_ool(s, a, fns[a->esz]); \
362}
363
364DO_ZPZZ(AND, and)
365DO_ZPZZ(EOR, eor)
366DO_ZPZZ(ORR, orr)
367DO_ZPZZ(BIC, bic)
368
369DO_ZPZZ(ADD, add)
370DO_ZPZZ(SUB, sub)
371
372DO_ZPZZ(SMAX, smax)
373DO_ZPZZ(UMAX, umax)
374DO_ZPZZ(SMIN, smin)
375DO_ZPZZ(UMIN, umin)
376DO_ZPZZ(SABD, sabd)
377DO_ZPZZ(UABD, uabd)
378
379DO_ZPZZ(MUL, mul)
380DO_ZPZZ(SMULH, smulh)
381DO_ZPZZ(UMULH, umulh)
382
27721dbb
RH
383DO_ZPZZ(ASR, asr)
384DO_ZPZZ(LSR, lsr)
385DO_ZPZZ(LSL, lsl)
386
f97cfd59
RH
387static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
388{
389 static gen_helper_gvec_4 * const fns[4] = {
390 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
391 };
392 return do_zpzz_ool(s, a, fns[a->esz]);
393}
394
395static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
396{
397 static gen_helper_gvec_4 * const fns[4] = {
398 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
399 };
400 return do_zpzz_ool(s, a, fns[a->esz]);
401}
402
d3fe4a29
RH
403DO_ZPZZ(SEL, sel)
404
f97cfd59
RH
405#undef DO_ZPZZ
406
afac6d04
RH
407/*
408 *** SVE Integer Arithmetic - Unary Predicated Group
409 */
410
411static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
412{
413 if (fn == NULL) {
414 return false;
415 }
416 if (sve_access_check(s)) {
417 unsigned vsz = vec_full_reg_size(s);
418 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
419 vec_full_reg_offset(s, a->rn),
420 pred_full_reg_offset(s, a->pg),
421 vsz, vsz, 0, fn);
422 }
423 return true;
424}
425
426#define DO_ZPZ(NAME, name) \
427static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
428{ \
429 static gen_helper_gvec_3 * const fns[4] = { \
430 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
431 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
432 }; \
433 return do_zpz_ool(s, a, fns[a->esz]); \
434}
435
436DO_ZPZ(CLS, cls)
437DO_ZPZ(CLZ, clz)
438DO_ZPZ(CNT_zpz, cnt_zpz)
439DO_ZPZ(CNOT, cnot)
440DO_ZPZ(NOT_zpz, not_zpz)
441DO_ZPZ(ABS, abs)
442DO_ZPZ(NEG, neg)
443
444static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
445{
446 static gen_helper_gvec_3 * const fns[4] = {
447 NULL,
448 gen_helper_sve_fabs_h,
449 gen_helper_sve_fabs_s,
450 gen_helper_sve_fabs_d
451 };
452 return do_zpz_ool(s, a, fns[a->esz]);
453}
454
455static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
456{
457 static gen_helper_gvec_3 * const fns[4] = {
458 NULL,
459 gen_helper_sve_fneg_h,
460 gen_helper_sve_fneg_s,
461 gen_helper_sve_fneg_d
462 };
463 return do_zpz_ool(s, a, fns[a->esz]);
464}
465
466static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
467{
468 static gen_helper_gvec_3 * const fns[4] = {
469 NULL,
470 gen_helper_sve_sxtb_h,
471 gen_helper_sve_sxtb_s,
472 gen_helper_sve_sxtb_d
473 };
474 return do_zpz_ool(s, a, fns[a->esz]);
475}
476
477static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
478{
479 static gen_helper_gvec_3 * const fns[4] = {
480 NULL,
481 gen_helper_sve_uxtb_h,
482 gen_helper_sve_uxtb_s,
483 gen_helper_sve_uxtb_d
484 };
485 return do_zpz_ool(s, a, fns[a->esz]);
486}
487
488static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
489{
490 static gen_helper_gvec_3 * const fns[4] = {
491 NULL, NULL,
492 gen_helper_sve_sxth_s,
493 gen_helper_sve_sxth_d
494 };
495 return do_zpz_ool(s, a, fns[a->esz]);
496}
497
498static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
499{
500 static gen_helper_gvec_3 * const fns[4] = {
501 NULL, NULL,
502 gen_helper_sve_uxth_s,
503 gen_helper_sve_uxth_d
504 };
505 return do_zpz_ool(s, a, fns[a->esz]);
506}
507
508static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
509{
510 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
511}
512
513static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
514{
515 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
516}
517
518#undef DO_ZPZ
519
047cec97
RH
520/*
521 *** SVE Integer Reduction Group
522 */
523
524typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
525static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
526 gen_helper_gvec_reduc *fn)
527{
528 unsigned vsz = vec_full_reg_size(s);
529 TCGv_ptr t_zn, t_pg;
530 TCGv_i32 desc;
531 TCGv_i64 temp;
532
533 if (fn == NULL) {
534 return false;
535 }
536 if (!sve_access_check(s)) {
537 return true;
538 }
539
540 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
541 temp = tcg_temp_new_i64();
542 t_zn = tcg_temp_new_ptr();
543 t_pg = tcg_temp_new_ptr();
544
545 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
546 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
547 fn(temp, t_zn, t_pg, desc);
548 tcg_temp_free_ptr(t_zn);
549 tcg_temp_free_ptr(t_pg);
550 tcg_temp_free_i32(desc);
551
552 write_fp_dreg(s, a->rd, temp);
553 tcg_temp_free_i64(temp);
554 return true;
555}
556
557#define DO_VPZ(NAME, name) \
558static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
559{ \
560 static gen_helper_gvec_reduc * const fns[4] = { \
561 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
562 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
563 }; \
564 return do_vpz_ool(s, a, fns[a->esz]); \
565}
566
567DO_VPZ(ORV, orv)
568DO_VPZ(ANDV, andv)
569DO_VPZ(EORV, eorv)
570
571DO_VPZ(UADDV, uaddv)
572DO_VPZ(SMAXV, smaxv)
573DO_VPZ(UMAXV, umaxv)
574DO_VPZ(SMINV, sminv)
575DO_VPZ(UMINV, uminv)
576
577static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
578{
579 static gen_helper_gvec_reduc * const fns[4] = {
580 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
581 gen_helper_sve_saddv_s, NULL
582 };
583 return do_vpz_ool(s, a, fns[a->esz]);
584}
585
586#undef DO_VPZ
587
ccd841c3
RH
588/*
589 *** SVE Shift by Immediate - Predicated Group
590 */
591
592/* Store zero into every active element of Zd. We will use this for two
593 * and three-operand predicated instructions for which logic dictates a
594 * zero result.
595 */
596static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
597{
598 static gen_helper_gvec_2 * const fns[4] = {
599 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
600 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
601 };
602 if (sve_access_check(s)) {
603 unsigned vsz = vec_full_reg_size(s);
604 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
605 pred_full_reg_offset(s, pg),
606 vsz, vsz, 0, fns[esz]);
607 }
608 return true;
609}
610
68459864
RH
611/* Copy Zn into Zd, storing zeros into inactive elements. */
612static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
613{
614 static gen_helper_gvec_3 * const fns[4] = {
615 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
616 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
617 };
618 unsigned vsz = vec_full_reg_size(s);
619 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
620 vec_full_reg_offset(s, rn),
621 pred_full_reg_offset(s, pg),
622 vsz, vsz, 0, fns[esz]);
623}
624
ccd841c3
RH
625static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
626 gen_helper_gvec_3 *fn)
627{
628 if (sve_access_check(s)) {
629 unsigned vsz = vec_full_reg_size(s);
630 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
631 vec_full_reg_offset(s, a->rn),
632 pred_full_reg_offset(s, a->pg),
633 vsz, vsz, a->imm, fn);
634 }
635 return true;
636}
637
638static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
639{
640 static gen_helper_gvec_3 * const fns[4] = {
641 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
642 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
643 };
644 if (a->esz < 0) {
645 /* Invalid tsz encoding -- see tszimm_esz. */
646 return false;
647 }
648 /* Shift by element size is architecturally valid. For
649 arithmetic right-shift, it's the same as by one less. */
650 a->imm = MIN(a->imm, (8 << a->esz) - 1);
651 return do_zpzi_ool(s, a, fns[a->esz]);
652}
653
654static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
655{
656 static gen_helper_gvec_3 * const fns[4] = {
657 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
658 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
659 };
660 if (a->esz < 0) {
661 return false;
662 }
663 /* Shift by element size is architecturally valid.
664 For logical shifts, it is a zeroing operation. */
665 if (a->imm >= (8 << a->esz)) {
666 return do_clr_zp(s, a->rd, a->pg, a->esz);
667 } else {
668 return do_zpzi_ool(s, a, fns[a->esz]);
669 }
670}
671
672static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
673{
674 static gen_helper_gvec_3 * const fns[4] = {
675 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
676 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
677 };
678 if (a->esz < 0) {
679 return false;
680 }
681 /* Shift by element size is architecturally valid.
682 For logical shifts, it is a zeroing operation. */
683 if (a->imm >= (8 << a->esz)) {
684 return do_clr_zp(s, a->rd, a->pg, a->esz);
685 } else {
686 return do_zpzi_ool(s, a, fns[a->esz]);
687 }
688}
689
690static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
691{
692 static gen_helper_gvec_3 * const fns[4] = {
693 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
694 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
695 };
696 if (a->esz < 0) {
697 return false;
698 }
699 /* Shift by element size is architecturally valid. For arithmetic
700 right shift for division, it is a zeroing operation. */
701 if (a->imm >= (8 << a->esz)) {
702 return do_clr_zp(s, a->rd, a->pg, a->esz);
703 } else {
704 return do_zpzi_ool(s, a, fns[a->esz]);
705 }
706}
707
fe7f8dfb
RH
708/*
709 *** SVE Bitwise Shift - Predicated Group
710 */
711
712#define DO_ZPZW(NAME, name) \
713static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
714 uint32_t insn) \
715{ \
716 static gen_helper_gvec_4 * const fns[3] = { \
717 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
718 gen_helper_sve_##name##_zpzw_s, \
719 }; \
720 if (a->esz < 0 || a->esz >= 3) { \
721 return false; \
722 } \
723 return do_zpzz_ool(s, a, fns[a->esz]); \
724}
725
726DO_ZPZW(ASR, asr)
727DO_ZPZW(LSR, lsr)
728DO_ZPZW(LSL, lsl)
729
730#undef DO_ZPZW
731
d9d78dcc
RH
732/*
733 *** SVE Bitwise Shift - Unpredicated Group
734 */
735
736static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
737 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
738 int64_t, uint32_t, uint32_t))
739{
740 if (a->esz < 0) {
741 /* Invalid tsz encoding -- see tszimm_esz. */
742 return false;
743 }
744 if (sve_access_check(s)) {
745 unsigned vsz = vec_full_reg_size(s);
746 /* Shift by element size is architecturally valid. For
747 arithmetic right-shift, it's the same as by one less.
748 Otherwise it is a zeroing operation. */
749 if (a->imm >= 8 << a->esz) {
750 if (asr) {
751 a->imm = (8 << a->esz) - 1;
752 } else {
753 do_dupi_z(s, a->rd, 0);
754 return true;
755 }
756 }
757 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
758 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
759 }
760 return true;
761}
762
763static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
764{
765 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
766}
767
768static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
769{
770 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
771}
772
773static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
774{
775 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
776}
777
778static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
779{
780 if (fn == NULL) {
781 return false;
782 }
783 if (sve_access_check(s)) {
784 unsigned vsz = vec_full_reg_size(s);
785 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
786 vec_full_reg_offset(s, a->rn),
787 vec_full_reg_offset(s, a->rm),
788 vsz, vsz, 0, fn);
789 }
790 return true;
791}
792
793#define DO_ZZW(NAME, name) \
794static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
795 uint32_t insn) \
796{ \
797 static gen_helper_gvec_3 * const fns[4] = { \
798 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
799 gen_helper_sve_##name##_zzw_s, NULL \
800 }; \
801 return do_zzw_ool(s, a, fns[a->esz]); \
802}
803
804DO_ZZW(ASR, asr)
805DO_ZZW(LSR, lsr)
806DO_ZZW(LSL, lsl)
807
808#undef DO_ZZW
809
96a36e4a
RH
810/*
811 *** SVE Integer Multiply-Add Group
812 */
813
814static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
815 gen_helper_gvec_5 *fn)
816{
817 if (sve_access_check(s)) {
818 unsigned vsz = vec_full_reg_size(s);
819 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
820 vec_full_reg_offset(s, a->ra),
821 vec_full_reg_offset(s, a->rn),
822 vec_full_reg_offset(s, a->rm),
823 pred_full_reg_offset(s, a->pg),
824 vsz, vsz, 0, fn);
825 }
826 return true;
827}
828
829#define DO_ZPZZZ(NAME, name) \
830static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
831{ \
832 static gen_helper_gvec_5 * const fns[4] = { \
833 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
834 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
835 }; \
836 return do_zpzzz_ool(s, a, fns[a->esz]); \
837}
838
839DO_ZPZZZ(MLA, mla)
840DO_ZPZZZ(MLS, mls)
841
842#undef DO_ZPZZZ
843
9a56c9c3
RH
844/*
845 *** SVE Index Generation Group
846 */
847
848static void do_index(DisasContext *s, int esz, int rd,
849 TCGv_i64 start, TCGv_i64 incr)
850{
851 unsigned vsz = vec_full_reg_size(s);
852 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
853 TCGv_ptr t_zd = tcg_temp_new_ptr();
854
855 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
856 if (esz == 3) {
857 gen_helper_sve_index_d(t_zd, start, incr, desc);
858 } else {
859 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
860 static index_fn * const fns[3] = {
861 gen_helper_sve_index_b,
862 gen_helper_sve_index_h,
863 gen_helper_sve_index_s,
864 };
865 TCGv_i32 s32 = tcg_temp_new_i32();
866 TCGv_i32 i32 = tcg_temp_new_i32();
867
868 tcg_gen_extrl_i64_i32(s32, start);
869 tcg_gen_extrl_i64_i32(i32, incr);
870 fns[esz](t_zd, s32, i32, desc);
871
872 tcg_temp_free_i32(s32);
873 tcg_temp_free_i32(i32);
874 }
875 tcg_temp_free_ptr(t_zd);
876 tcg_temp_free_i32(desc);
877}
878
879static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
880{
881 if (sve_access_check(s)) {
882 TCGv_i64 start = tcg_const_i64(a->imm1);
883 TCGv_i64 incr = tcg_const_i64(a->imm2);
884 do_index(s, a->esz, a->rd, start, incr);
885 tcg_temp_free_i64(start);
886 tcg_temp_free_i64(incr);
887 }
888 return true;
889}
890
891static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
892{
893 if (sve_access_check(s)) {
894 TCGv_i64 start = tcg_const_i64(a->imm);
895 TCGv_i64 incr = cpu_reg(s, a->rm);
896 do_index(s, a->esz, a->rd, start, incr);
897 tcg_temp_free_i64(start);
898 }
899 return true;
900}
901
902static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
903{
904 if (sve_access_check(s)) {
905 TCGv_i64 start = cpu_reg(s, a->rn);
906 TCGv_i64 incr = tcg_const_i64(a->imm);
907 do_index(s, a->esz, a->rd, start, incr);
908 tcg_temp_free_i64(incr);
909 }
910 return true;
911}
912
913static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
914{
915 if (sve_access_check(s)) {
916 TCGv_i64 start = cpu_reg(s, a->rn);
917 TCGv_i64 incr = cpu_reg(s, a->rm);
918 do_index(s, a->esz, a->rd, start, incr);
919 }
920 return true;
921}
922
96f922cc
RH
923/*
924 *** SVE Stack Allocation Group
925 */
926
927static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
928{
929 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
930 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
931 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
932 return true;
933}
934
935static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
936{
937 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
938 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
939 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
940 return true;
941}
942
943static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
944{
945 TCGv_i64 reg = cpu_reg(s, a->rd);
946 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
947 return true;
948}
949
4b242d9c
RH
950/*
951 *** SVE Compute Vector Address Group
952 */
953
954static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
955{
956 if (sve_access_check(s)) {
957 unsigned vsz = vec_full_reg_size(s);
958 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
959 vec_full_reg_offset(s, a->rn),
960 vec_full_reg_offset(s, a->rm),
961 vsz, vsz, a->imm, fn);
962 }
963 return true;
964}
965
966static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
967{
968 return do_adr(s, a, gen_helper_sve_adr_p32);
969}
970
971static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
972{
973 return do_adr(s, a, gen_helper_sve_adr_p64);
974}
975
976static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
977{
978 return do_adr(s, a, gen_helper_sve_adr_s32);
979}
980
981static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
982{
983 return do_adr(s, a, gen_helper_sve_adr_u32);
984}
985
0762cd42
RH
986/*
987 *** SVE Integer Misc - Unpredicated Group
988 */
989
990static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
991{
992 static gen_helper_gvec_2 * const fns[4] = {
993 NULL,
994 gen_helper_sve_fexpa_h,
995 gen_helper_sve_fexpa_s,
996 gen_helper_sve_fexpa_d,
997 };
998 if (a->esz == 0) {
999 return false;
1000 }
1001 if (sve_access_check(s)) {
1002 unsigned vsz = vec_full_reg_size(s);
1003 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1004 vec_full_reg_offset(s, a->rn),
1005 vsz, vsz, 0, fns[a->esz]);
1006 }
1007 return true;
1008}
1009
a1f233f2
RH
1010static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1011{
1012 static gen_helper_gvec_3 * const fns[4] = {
1013 NULL,
1014 gen_helper_sve_ftssel_h,
1015 gen_helper_sve_ftssel_s,
1016 gen_helper_sve_ftssel_d,
1017 };
1018 if (a->esz == 0) {
1019 return false;
1020 }
1021 if (sve_access_check(s)) {
1022 unsigned vsz = vec_full_reg_size(s);
1023 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1024 vec_full_reg_offset(s, a->rn),
1025 vec_full_reg_offset(s, a->rm),
1026 vsz, vsz, 0, fns[a->esz]);
1027 }
1028 return true;
1029}
1030
516e246a
RH
1031/*
1032 *** SVE Predicate Logical Operations Group
1033 */
1034
1035static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1036 const GVecGen4 *gvec_op)
1037{
1038 if (!sve_access_check(s)) {
1039 return true;
1040 }
1041
1042 unsigned psz = pred_gvec_reg_size(s);
1043 int dofs = pred_full_reg_offset(s, a->rd);
1044 int nofs = pred_full_reg_offset(s, a->rn);
1045 int mofs = pred_full_reg_offset(s, a->rm);
1046 int gofs = pred_full_reg_offset(s, a->pg);
1047
1048 if (psz == 8) {
1049 /* Do the operation and the flags generation in temps. */
1050 TCGv_i64 pd = tcg_temp_new_i64();
1051 TCGv_i64 pn = tcg_temp_new_i64();
1052 TCGv_i64 pm = tcg_temp_new_i64();
1053 TCGv_i64 pg = tcg_temp_new_i64();
1054
1055 tcg_gen_ld_i64(pn, cpu_env, nofs);
1056 tcg_gen_ld_i64(pm, cpu_env, mofs);
1057 tcg_gen_ld_i64(pg, cpu_env, gofs);
1058
1059 gvec_op->fni8(pd, pn, pm, pg);
1060 tcg_gen_st_i64(pd, cpu_env, dofs);
1061
1062 do_predtest1(pd, pg);
1063
1064 tcg_temp_free_i64(pd);
1065 tcg_temp_free_i64(pn);
1066 tcg_temp_free_i64(pm);
1067 tcg_temp_free_i64(pg);
1068 } else {
1069 /* The operation and flags generation is large. The computation
1070 * of the flags depends on the original contents of the guarding
1071 * predicate. If the destination overwrites the guarding predicate,
1072 * then the easiest way to get this right is to save a copy.
1073 */
1074 int tofs = gofs;
1075 if (a->rd == a->pg) {
1076 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1077 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1078 }
1079
1080 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1081 do_predtest(s, dofs, tofs, psz / 8);
1082 }
1083 return true;
1084}
1085
1086static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1087{
1088 tcg_gen_and_i64(pd, pn, pm);
1089 tcg_gen_and_i64(pd, pd, pg);
1090}
1091
1092static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1093 TCGv_vec pm, TCGv_vec pg)
1094{
1095 tcg_gen_and_vec(vece, pd, pn, pm);
1096 tcg_gen_and_vec(vece, pd, pd, pg);
1097}
1098
1099static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1100{
1101 static const GVecGen4 op = {
1102 .fni8 = gen_and_pg_i64,
1103 .fniv = gen_and_pg_vec,
1104 .fno = gen_helper_sve_and_pppp,
1105 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1106 };
1107 if (a->s) {
1108 return do_pppp_flags(s, a, &op);
1109 } else if (a->rn == a->rm) {
1110 if (a->pg == a->rn) {
1111 return do_mov_p(s, a->rd, a->rn);
1112 } else {
1113 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1114 }
1115 } else if (a->pg == a->rn || a->pg == a->rm) {
1116 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1117 } else {
1118 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1119 }
1120}
1121
1122static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1123{
1124 tcg_gen_andc_i64(pd, pn, pm);
1125 tcg_gen_and_i64(pd, pd, pg);
1126}
1127
1128static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1129 TCGv_vec pm, TCGv_vec pg)
1130{
1131 tcg_gen_andc_vec(vece, pd, pn, pm);
1132 tcg_gen_and_vec(vece, pd, pd, pg);
1133}
1134
1135static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1136{
1137 static const GVecGen4 op = {
1138 .fni8 = gen_bic_pg_i64,
1139 .fniv = gen_bic_pg_vec,
1140 .fno = gen_helper_sve_bic_pppp,
1141 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1142 };
1143 if (a->s) {
1144 return do_pppp_flags(s, a, &op);
1145 } else if (a->pg == a->rn) {
1146 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1147 } else {
1148 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1149 }
1150}
1151
1152static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1153{
1154 tcg_gen_xor_i64(pd, pn, pm);
1155 tcg_gen_and_i64(pd, pd, pg);
1156}
1157
1158static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1159 TCGv_vec pm, TCGv_vec pg)
1160{
1161 tcg_gen_xor_vec(vece, pd, pn, pm);
1162 tcg_gen_and_vec(vece, pd, pd, pg);
1163}
1164
1165static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1166{
1167 static const GVecGen4 op = {
1168 .fni8 = gen_eor_pg_i64,
1169 .fniv = gen_eor_pg_vec,
1170 .fno = gen_helper_sve_eor_pppp,
1171 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1172 };
1173 if (a->s) {
1174 return do_pppp_flags(s, a, &op);
1175 } else {
1176 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1177 }
1178}
1179
1180static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1181{
1182 tcg_gen_and_i64(pn, pn, pg);
1183 tcg_gen_andc_i64(pm, pm, pg);
1184 tcg_gen_or_i64(pd, pn, pm);
1185}
1186
1187static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1188 TCGv_vec pm, TCGv_vec pg)
1189{
1190 tcg_gen_and_vec(vece, pn, pn, pg);
1191 tcg_gen_andc_vec(vece, pm, pm, pg);
1192 tcg_gen_or_vec(vece, pd, pn, pm);
1193}
1194
1195static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1196{
1197 static const GVecGen4 op = {
1198 .fni8 = gen_sel_pg_i64,
1199 .fniv = gen_sel_pg_vec,
1200 .fno = gen_helper_sve_sel_pppp,
1201 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1202 };
1203 if (a->s) {
1204 return false;
1205 } else {
1206 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1207 }
1208}
1209
1210static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1211{
1212 tcg_gen_or_i64(pd, pn, pm);
1213 tcg_gen_and_i64(pd, pd, pg);
1214}
1215
1216static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1217 TCGv_vec pm, TCGv_vec pg)
1218{
1219 tcg_gen_or_vec(vece, pd, pn, pm);
1220 tcg_gen_and_vec(vece, pd, pd, pg);
1221}
1222
1223static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1224{
1225 static const GVecGen4 op = {
1226 .fni8 = gen_orr_pg_i64,
1227 .fniv = gen_orr_pg_vec,
1228 .fno = gen_helper_sve_orr_pppp,
1229 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1230 };
1231 if (a->s) {
1232 return do_pppp_flags(s, a, &op);
1233 } else if (a->pg == a->rn && a->rn == a->rm) {
1234 return do_mov_p(s, a->rd, a->rn);
1235 } else {
1236 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1237 }
1238}
1239
1240static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1241{
1242 tcg_gen_orc_i64(pd, pn, pm);
1243 tcg_gen_and_i64(pd, pd, pg);
1244}
1245
1246static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1247 TCGv_vec pm, TCGv_vec pg)
1248{
1249 tcg_gen_orc_vec(vece, pd, pn, pm);
1250 tcg_gen_and_vec(vece, pd, pd, pg);
1251}
1252
1253static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1254{
1255 static const GVecGen4 op = {
1256 .fni8 = gen_orn_pg_i64,
1257 .fniv = gen_orn_pg_vec,
1258 .fno = gen_helper_sve_orn_pppp,
1259 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1260 };
1261 if (a->s) {
1262 return do_pppp_flags(s, a, &op);
1263 } else {
1264 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1265 }
1266}
1267
1268static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1269{
1270 tcg_gen_or_i64(pd, pn, pm);
1271 tcg_gen_andc_i64(pd, pg, pd);
1272}
1273
1274static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1275 TCGv_vec pm, TCGv_vec pg)
1276{
1277 tcg_gen_or_vec(vece, pd, pn, pm);
1278 tcg_gen_andc_vec(vece, pd, pg, pd);
1279}
1280
1281static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1282{
1283 static const GVecGen4 op = {
1284 .fni8 = gen_nor_pg_i64,
1285 .fniv = gen_nor_pg_vec,
1286 .fno = gen_helper_sve_nor_pppp,
1287 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1288 };
1289 if (a->s) {
1290 return do_pppp_flags(s, a, &op);
1291 } else {
1292 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1293 }
1294}
1295
1296static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1297{
1298 tcg_gen_and_i64(pd, pn, pm);
1299 tcg_gen_andc_i64(pd, pg, pd);
1300}
1301
1302static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1303 TCGv_vec pm, TCGv_vec pg)
1304{
1305 tcg_gen_and_vec(vece, pd, pn, pm);
1306 tcg_gen_andc_vec(vece, pd, pg, pd);
1307}
1308
1309static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1310{
1311 static const GVecGen4 op = {
1312 .fni8 = gen_nand_pg_i64,
1313 .fniv = gen_nand_pg_vec,
1314 .fno = gen_helper_sve_nand_pppp,
1315 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1316 };
1317 if (a->s) {
1318 return do_pppp_flags(s, a, &op);
1319 } else {
1320 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1321 }
1322}
1323
9e18d7a6
RH
1324/*
1325 *** SVE Predicate Misc Group
1326 */
1327
1328static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1329{
1330 if (sve_access_check(s)) {
1331 int nofs = pred_full_reg_offset(s, a->rn);
1332 int gofs = pred_full_reg_offset(s, a->pg);
1333 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1334
1335 if (words == 1) {
1336 TCGv_i64 pn = tcg_temp_new_i64();
1337 TCGv_i64 pg = tcg_temp_new_i64();
1338
1339 tcg_gen_ld_i64(pn, cpu_env, nofs);
1340 tcg_gen_ld_i64(pg, cpu_env, gofs);
1341 do_predtest1(pn, pg);
1342
1343 tcg_temp_free_i64(pn);
1344 tcg_temp_free_i64(pg);
1345 } else {
1346 do_predtest(s, nofs, gofs, words);
1347 }
1348 }
1349 return true;
1350}
1351
028e2a7b
RH
1352/* See the ARM pseudocode DecodePredCount. */
1353static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1354{
1355 unsigned elements = fullsz >> esz;
1356 unsigned bound;
1357
1358 switch (pattern) {
1359 case 0x0: /* POW2 */
1360 return pow2floor(elements);
1361 case 0x1: /* VL1 */
1362 case 0x2: /* VL2 */
1363 case 0x3: /* VL3 */
1364 case 0x4: /* VL4 */
1365 case 0x5: /* VL5 */
1366 case 0x6: /* VL6 */
1367 case 0x7: /* VL7 */
1368 case 0x8: /* VL8 */
1369 bound = pattern;
1370 break;
1371 case 0x9: /* VL16 */
1372 case 0xa: /* VL32 */
1373 case 0xb: /* VL64 */
1374 case 0xc: /* VL128 */
1375 case 0xd: /* VL256 */
1376 bound = 16 << (pattern - 9);
1377 break;
1378 case 0x1d: /* MUL4 */
1379 return elements - elements % 4;
1380 case 0x1e: /* MUL3 */
1381 return elements - elements % 3;
1382 case 0x1f: /* ALL */
1383 return elements;
1384 default: /* #uimm5 */
1385 return 0;
1386 }
1387 return elements >= bound ? bound : 0;
1388}
1389
1390/* This handles all of the predicate initialization instructions,
1391 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1392 * so that decode_pred_count returns 0. For SETFFR, we will have
1393 * set RD == 16 == FFR.
1394 */
1395static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1396{
1397 if (!sve_access_check(s)) {
1398 return true;
1399 }
1400
1401 unsigned fullsz = vec_full_reg_size(s);
1402 unsigned ofs = pred_full_reg_offset(s, rd);
1403 unsigned numelem, setsz, i;
1404 uint64_t word, lastword;
1405 TCGv_i64 t;
1406
1407 numelem = decode_pred_count(fullsz, pat, esz);
1408
1409 /* Determine what we must store into each bit, and how many. */
1410 if (numelem == 0) {
1411 lastword = word = 0;
1412 setsz = fullsz;
1413 } else {
1414 setsz = numelem << esz;
1415 lastword = word = pred_esz_masks[esz];
1416 if (setsz % 64) {
1417 lastword &= ~(-1ull << (setsz % 64));
1418 }
1419 }
1420
1421 t = tcg_temp_new_i64();
1422 if (fullsz <= 64) {
1423 tcg_gen_movi_i64(t, lastword);
1424 tcg_gen_st_i64(t, cpu_env, ofs);
1425 goto done;
1426 }
1427
1428 if (word == lastword) {
1429 unsigned maxsz = size_for_gvec(fullsz / 8);
1430 unsigned oprsz = size_for_gvec(setsz / 8);
1431
1432 if (oprsz * 8 == setsz) {
1433 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1434 goto done;
1435 }
1436 if (oprsz * 8 == setsz + 8) {
1437 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1438 tcg_gen_movi_i64(t, 0);
1439 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1440 goto done;
1441 }
1442 }
1443
1444 setsz /= 8;
1445 fullsz /= 8;
1446
1447 tcg_gen_movi_i64(t, word);
1448 for (i = 0; i < setsz; i += 8) {
1449 tcg_gen_st_i64(t, cpu_env, ofs + i);
1450 }
1451 if (lastword != word) {
1452 tcg_gen_movi_i64(t, lastword);
1453 tcg_gen_st_i64(t, cpu_env, ofs + i);
1454 i += 8;
1455 }
1456 if (i < fullsz) {
1457 tcg_gen_movi_i64(t, 0);
1458 for (; i < fullsz; i += 8) {
1459 tcg_gen_st_i64(t, cpu_env, ofs + i);
1460 }
1461 }
1462
1463 done:
1464 tcg_temp_free_i64(t);
1465
1466 /* PTRUES */
1467 if (setflag) {
1468 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1469 tcg_gen_movi_i32(cpu_CF, word == 0);
1470 tcg_gen_movi_i32(cpu_VF, 0);
1471 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1472 }
1473 return true;
1474}
1475
1476static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1477{
1478 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1479}
1480
1481static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1482{
1483 /* Note pat == 31 is #all, to set all elements. */
1484 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1485}
1486
1487static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1488{
1489 /* Note pat == 32 is #unimp, to set no elements. */
1490 return do_predset(s, 0, a->rd, 32, false);
1491}
1492
1493static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1494{
1495 /* The path through do_pppp_flags is complicated enough to want to avoid
1496 * duplication. Frob the arguments into the form of a predicated AND.
1497 */
1498 arg_rprr_s alt_a = {
1499 .rd = a->rd, .pg = a->pg, .s = a->s,
1500 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1501 };
1502 return trans_AND_pppp(s, &alt_a, insn);
1503}
1504
1505static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1506{
1507 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1508}
1509
1510static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1511{
1512 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1513}
1514
1515static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1516 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1517 TCGv_ptr, TCGv_i32))
1518{
1519 if (!sve_access_check(s)) {
1520 return true;
1521 }
1522
1523 TCGv_ptr t_pd = tcg_temp_new_ptr();
1524 TCGv_ptr t_pg = tcg_temp_new_ptr();
1525 TCGv_i32 t;
1526 unsigned desc;
1527
1528 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1529 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1530
1531 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1532 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1533 t = tcg_const_i32(desc);
1534
1535 gen_fn(t, t_pd, t_pg, t);
1536 tcg_temp_free_ptr(t_pd);
1537 tcg_temp_free_ptr(t_pg);
1538
1539 do_pred_flags(t);
1540 tcg_temp_free_i32(t);
1541 return true;
1542}
1543
1544static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1545{
1546 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1547}
1548
1549static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1550{
1551 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1552}
1553
24e82e68
RH
1554/*
1555 *** SVE Element Count Group
1556 */
1557
1558/* Perform an inline saturating addition of a 32-bit value within
1559 * a 64-bit register. The second operand is known to be positive,
1560 * which halves the comparisions we must perform to bound the result.
1561 */
1562static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1563{
1564 int64_t ibound;
1565 TCGv_i64 bound;
1566 TCGCond cond;
1567
1568 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1569 if (u) {
1570 tcg_gen_ext32u_i64(reg, reg);
1571 } else {
1572 tcg_gen_ext32s_i64(reg, reg);
1573 }
1574 if (d) {
1575 tcg_gen_sub_i64(reg, reg, val);
1576 ibound = (u ? 0 : INT32_MIN);
1577 cond = TCG_COND_LT;
1578 } else {
1579 tcg_gen_add_i64(reg, reg, val);
1580 ibound = (u ? UINT32_MAX : INT32_MAX);
1581 cond = TCG_COND_GT;
1582 }
1583 bound = tcg_const_i64(ibound);
1584 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1585 tcg_temp_free_i64(bound);
1586}
1587
1588/* Similarly with 64-bit values. */
1589static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1590{
1591 TCGv_i64 t0 = tcg_temp_new_i64();
1592 TCGv_i64 t1 = tcg_temp_new_i64();
1593 TCGv_i64 t2;
1594
1595 if (u) {
1596 if (d) {
1597 tcg_gen_sub_i64(t0, reg, val);
1598 tcg_gen_movi_i64(t1, 0);
1599 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1600 } else {
1601 tcg_gen_add_i64(t0, reg, val);
1602 tcg_gen_movi_i64(t1, -1);
1603 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1604 }
1605 } else {
1606 if (d) {
1607 /* Detect signed overflow for subtraction. */
1608 tcg_gen_xor_i64(t0, reg, val);
1609 tcg_gen_sub_i64(t1, reg, val);
1610 tcg_gen_xor_i64(reg, reg, t0);
1611 tcg_gen_and_i64(t0, t0, reg);
1612
1613 /* Bound the result. */
1614 tcg_gen_movi_i64(reg, INT64_MIN);
1615 t2 = tcg_const_i64(0);
1616 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1617 } else {
1618 /* Detect signed overflow for addition. */
1619 tcg_gen_xor_i64(t0, reg, val);
1620 tcg_gen_add_i64(reg, reg, val);
1621 tcg_gen_xor_i64(t1, reg, val);
1622 tcg_gen_andc_i64(t0, t1, t0);
1623
1624 /* Bound the result. */
1625 tcg_gen_movi_i64(t1, INT64_MAX);
1626 t2 = tcg_const_i64(0);
1627 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1628 }
1629 tcg_temp_free_i64(t2);
1630 }
1631 tcg_temp_free_i64(t0);
1632 tcg_temp_free_i64(t1);
1633}
1634
1635/* Similarly with a vector and a scalar operand. */
1636static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1637 TCGv_i64 val, bool u, bool d)
1638{
1639 unsigned vsz = vec_full_reg_size(s);
1640 TCGv_ptr dptr, nptr;
1641 TCGv_i32 t32, desc;
1642 TCGv_i64 t64;
1643
1644 dptr = tcg_temp_new_ptr();
1645 nptr = tcg_temp_new_ptr();
1646 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1647 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1648 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1649
1650 switch (esz) {
1651 case MO_8:
1652 t32 = tcg_temp_new_i32();
1653 tcg_gen_extrl_i64_i32(t32, val);
1654 if (d) {
1655 tcg_gen_neg_i32(t32, t32);
1656 }
1657 if (u) {
1658 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1659 } else {
1660 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1661 }
1662 tcg_temp_free_i32(t32);
1663 break;
1664
1665 case MO_16:
1666 t32 = tcg_temp_new_i32();
1667 tcg_gen_extrl_i64_i32(t32, val);
1668 if (d) {
1669 tcg_gen_neg_i32(t32, t32);
1670 }
1671 if (u) {
1672 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1673 } else {
1674 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1675 }
1676 tcg_temp_free_i32(t32);
1677 break;
1678
1679 case MO_32:
1680 t64 = tcg_temp_new_i64();
1681 if (d) {
1682 tcg_gen_neg_i64(t64, val);
1683 } else {
1684 tcg_gen_mov_i64(t64, val);
1685 }
1686 if (u) {
1687 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1688 } else {
1689 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1690 }
1691 tcg_temp_free_i64(t64);
1692 break;
1693
1694 case MO_64:
1695 if (u) {
1696 if (d) {
1697 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1698 } else {
1699 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1700 }
1701 } else if (d) {
1702 t64 = tcg_temp_new_i64();
1703 tcg_gen_neg_i64(t64, val);
1704 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1705 tcg_temp_free_i64(t64);
1706 } else {
1707 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1708 }
1709 break;
1710
1711 default:
1712 g_assert_not_reached();
1713 }
1714
1715 tcg_temp_free_ptr(dptr);
1716 tcg_temp_free_ptr(nptr);
1717 tcg_temp_free_i32(desc);
1718}
1719
1720static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1721{
1722 if (sve_access_check(s)) {
1723 unsigned fullsz = vec_full_reg_size(s);
1724 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1725 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1726 }
1727 return true;
1728}
1729
1730static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1731{
1732 if (sve_access_check(s)) {
1733 unsigned fullsz = vec_full_reg_size(s);
1734 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1735 int inc = numelem * a->imm * (a->d ? -1 : 1);
1736 TCGv_i64 reg = cpu_reg(s, a->rd);
1737
1738 tcg_gen_addi_i64(reg, reg, inc);
1739 }
1740 return true;
1741}
1742
1743static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1744 uint32_t insn)
1745{
1746 if (!sve_access_check(s)) {
1747 return true;
1748 }
1749
1750 unsigned fullsz = vec_full_reg_size(s);
1751 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752 int inc = numelem * a->imm;
1753 TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1756 if (inc == 0) {
1757 if (a->u) {
1758 tcg_gen_ext32u_i64(reg, reg);
1759 } else {
1760 tcg_gen_ext32s_i64(reg, reg);
1761 }
1762 } else {
1763 TCGv_i64 t = tcg_const_i64(inc);
1764 do_sat_addsub_32(reg, t, a->u, a->d);
1765 tcg_temp_free_i64(t);
1766 }
1767 return true;
1768}
1769
1770static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1771 uint32_t insn)
1772{
1773 if (!sve_access_check(s)) {
1774 return true;
1775 }
1776
1777 unsigned fullsz = vec_full_reg_size(s);
1778 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1779 int inc = numelem * a->imm;
1780 TCGv_i64 reg = cpu_reg(s, a->rd);
1781
1782 if (inc != 0) {
1783 TCGv_i64 t = tcg_const_i64(inc);
1784 do_sat_addsub_64(reg, t, a->u, a->d);
1785 tcg_temp_free_i64(t);
1786 }
1787 return true;
1788}
1789
1790static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1791{
1792 if (a->esz == 0) {
1793 return false;
1794 }
1795
1796 unsigned fullsz = vec_full_reg_size(s);
1797 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1798 int inc = numelem * a->imm;
1799
1800 if (inc != 0) {
1801 if (sve_access_check(s)) {
1802 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1803 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1804 vec_full_reg_offset(s, a->rn),
1805 t, fullsz, fullsz);
1806 tcg_temp_free_i64(t);
1807 }
1808 } else {
1809 do_mov_z(s, a->rd, a->rn);
1810 }
1811 return true;
1812}
1813
1814static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1815 uint32_t insn)
1816{
1817 if (a->esz == 0) {
1818 return false;
1819 }
1820
1821 unsigned fullsz = vec_full_reg_size(s);
1822 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1823 int inc = numelem * a->imm;
1824
1825 if (inc != 0) {
1826 if (sve_access_check(s)) {
1827 TCGv_i64 t = tcg_const_i64(inc);
1828 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1829 tcg_temp_free_i64(t);
1830 }
1831 } else {
1832 do_mov_z(s, a->rd, a->rn);
1833 }
1834 return true;
1835}
1836
e1fa1164
RH
1837/*
1838 *** SVE Bitwise Immediate Group
1839 */
1840
1841static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1842{
1843 uint64_t imm;
1844 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1845 extract32(a->dbm, 0, 6),
1846 extract32(a->dbm, 6, 6))) {
1847 return false;
1848 }
1849 if (sve_access_check(s)) {
1850 unsigned vsz = vec_full_reg_size(s);
1851 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1852 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1853 }
1854 return true;
1855}
1856
1857static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1858{
1859 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1860}
1861
1862static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1863{
1864 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1865}
1866
1867static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1868{
1869 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1870}
1871
1872static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1873{
1874 uint64_t imm;
1875 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1876 extract32(a->dbm, 0, 6),
1877 extract32(a->dbm, 6, 6))) {
1878 return false;
1879 }
1880 if (sve_access_check(s)) {
1881 do_dupi_z(s, a->rd, imm);
1882 }
1883 return true;
1884}
1885
f25a2361
RH
1886/*
1887 *** SVE Integer Wide Immediate - Predicated Group
1888 */
1889
1890/* Implement all merging copies. This is used for CPY (immediate),
1891 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1892 */
1893static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1894 TCGv_i64 val)
1895{
1896 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1897 static gen_cpy * const fns[4] = {
1898 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1899 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1900 };
1901 unsigned vsz = vec_full_reg_size(s);
1902 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1903 TCGv_ptr t_zd = tcg_temp_new_ptr();
1904 TCGv_ptr t_zn = tcg_temp_new_ptr();
1905 TCGv_ptr t_pg = tcg_temp_new_ptr();
1906
1907 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1908 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1909 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1910
1911 fns[esz](t_zd, t_zn, t_pg, val, desc);
1912
1913 tcg_temp_free_ptr(t_zd);
1914 tcg_temp_free_ptr(t_zn);
1915 tcg_temp_free_ptr(t_pg);
1916 tcg_temp_free_i32(desc);
1917}
1918
1919static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1920{
1921 if (a->esz == 0) {
1922 return false;
1923 }
1924 if (sve_access_check(s)) {
1925 /* Decode the VFP immediate. */
1926 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1927 TCGv_i64 t_imm = tcg_const_i64(imm);
1928 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1929 tcg_temp_free_i64(t_imm);
1930 }
1931 return true;
1932}
1933
1934static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1935{
1936 if (a->esz == 0 && extract32(insn, 13, 1)) {
1937 return false;
1938 }
1939 if (sve_access_check(s)) {
1940 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1941 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1942 tcg_temp_free_i64(t_imm);
1943 }
1944 return true;
1945}
1946
1947static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1948{
1949 static gen_helper_gvec_2i * const fns[4] = {
1950 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1951 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1952 };
1953
1954 if (a->esz == 0 && extract32(insn, 13, 1)) {
1955 return false;
1956 }
1957 if (sve_access_check(s)) {
1958 unsigned vsz = vec_full_reg_size(s);
1959 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1960 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1961 pred_full_reg_offset(s, a->pg),
1962 t_imm, vsz, vsz, 0, fns[a->esz]);
1963 tcg_temp_free_i64(t_imm);
1964 }
1965 return true;
1966}
1967
b94f8f60
RH
1968/*
1969 *** SVE Permute Extract Group
1970 */
1971
1972static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1973{
1974 if (!sve_access_check(s)) {
1975 return true;
1976 }
1977
1978 unsigned vsz = vec_full_reg_size(s);
1979 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1980 unsigned n_siz = vsz - n_ofs;
1981 unsigned d = vec_full_reg_offset(s, a->rd);
1982 unsigned n = vec_full_reg_offset(s, a->rn);
1983 unsigned m = vec_full_reg_offset(s, a->rm);
1984
1985 /* Use host vector move insns if we have appropriate sizes
1986 * and no unfortunate overlap.
1987 */
1988 if (m != d
1989 && n_ofs == size_for_gvec(n_ofs)
1990 && n_siz == size_for_gvec(n_siz)
1991 && (d != n || n_siz <= n_ofs)) {
1992 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1993 if (n_ofs != 0) {
1994 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1995 }
1996 } else {
1997 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1998 }
1999 return true;
2000}
2001
30562ab7
RH
2002/*
2003 *** SVE Permute - Unpredicated Group
2004 */
2005
2006static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2007{
2008 if (sve_access_check(s)) {
2009 unsigned vsz = vec_full_reg_size(s);
2010 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2011 vsz, vsz, cpu_reg_sp(s, a->rn));
2012 }
2013 return true;
2014}
2015
2016static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2017{
2018 if ((a->imm & 0x1f) == 0) {
2019 return false;
2020 }
2021 if (sve_access_check(s)) {
2022 unsigned vsz = vec_full_reg_size(s);
2023 unsigned dofs = vec_full_reg_offset(s, a->rd);
2024 unsigned esz, index;
2025
2026 esz = ctz32(a->imm);
2027 index = a->imm >> (esz + 1);
2028
2029 if ((index << esz) < vsz) {
2030 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2031 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2032 } else {
2033 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2034 }
2035 }
2036 return true;
2037}
2038
2039static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2040{
2041 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2042 static gen_insr * const fns[4] = {
2043 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2044 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2045 };
2046 unsigned vsz = vec_full_reg_size(s);
2047 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2048 TCGv_ptr t_zd = tcg_temp_new_ptr();
2049 TCGv_ptr t_zn = tcg_temp_new_ptr();
2050
2051 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2052 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2053
2054 fns[a->esz](t_zd, t_zn, val, desc);
2055
2056 tcg_temp_free_ptr(t_zd);
2057 tcg_temp_free_ptr(t_zn);
2058 tcg_temp_free_i32(desc);
2059}
2060
2061static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2062{
2063 if (sve_access_check(s)) {
2064 TCGv_i64 t = tcg_temp_new_i64();
2065 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2066 do_insr_i64(s, a, t);
2067 tcg_temp_free_i64(t);
2068 }
2069 return true;
2070}
2071
2072static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2073{
2074 if (sve_access_check(s)) {
2075 do_insr_i64(s, a, cpu_reg(s, a->rm));
2076 }
2077 return true;
2078}
2079
2080static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2081{
2082 static gen_helper_gvec_2 * const fns[4] = {
2083 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2084 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2085 };
2086
2087 if (sve_access_check(s)) {
2088 unsigned vsz = vec_full_reg_size(s);
2089 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2090 vec_full_reg_offset(s, a->rn),
2091 vsz, vsz, 0, fns[a->esz]);
2092 }
2093 return true;
2094}
2095
2096static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2097{
2098 static gen_helper_gvec_3 * const fns[4] = {
2099 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2100 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2101 };
2102
2103 if (sve_access_check(s)) {
2104 unsigned vsz = vec_full_reg_size(s);
2105 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2106 vec_full_reg_offset(s, a->rn),
2107 vec_full_reg_offset(s, a->rm),
2108 vsz, vsz, 0, fns[a->esz]);
2109 }
2110 return true;
2111}
2112
2113static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2114{
2115 static gen_helper_gvec_2 * const fns[4][2] = {
2116 { NULL, NULL },
2117 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2118 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2119 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2120 };
2121
2122 if (a->esz == 0) {
2123 return false;
2124 }
2125 if (sve_access_check(s)) {
2126 unsigned vsz = vec_full_reg_size(s);
2127 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2128 vec_full_reg_offset(s, a->rn)
2129 + (a->h ? vsz / 2 : 0),
2130 vsz, vsz, 0, fns[a->esz][a->u]);
2131 }
2132 return true;
2133}
2134
d731d8cb
RH
2135/*
2136 *** SVE Permute - Predicates Group
2137 */
2138
2139static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2140 gen_helper_gvec_3 *fn)
2141{
2142 if (!sve_access_check(s)) {
2143 return true;
2144 }
2145
2146 unsigned vsz = pred_full_reg_size(s);
2147
2148 /* Predicate sizes may be smaller and cannot use simd_desc.
2149 We cannot round up, as we do elsewhere, because we need
2150 the exact size for ZIP2 and REV. We retain the style for
2151 the other helpers for consistency. */
2152 TCGv_ptr t_d = tcg_temp_new_ptr();
2153 TCGv_ptr t_n = tcg_temp_new_ptr();
2154 TCGv_ptr t_m = tcg_temp_new_ptr();
2155 TCGv_i32 t_desc;
2156 int desc;
2157
2158 desc = vsz - 2;
2159 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2160 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2161
2162 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2163 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2164 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2165 t_desc = tcg_const_i32(desc);
2166
2167 fn(t_d, t_n, t_m, t_desc);
2168
2169 tcg_temp_free_ptr(t_d);
2170 tcg_temp_free_ptr(t_n);
2171 tcg_temp_free_ptr(t_m);
2172 tcg_temp_free_i32(t_desc);
2173 return true;
2174}
2175
2176static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2177 gen_helper_gvec_2 *fn)
2178{
2179 if (!sve_access_check(s)) {
2180 return true;
2181 }
2182
2183 unsigned vsz = pred_full_reg_size(s);
2184 TCGv_ptr t_d = tcg_temp_new_ptr();
2185 TCGv_ptr t_n = tcg_temp_new_ptr();
2186 TCGv_i32 t_desc;
2187 int desc;
2188
2189 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2190 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2191
2192 /* Predicate sizes may be smaller and cannot use simd_desc.
2193 We cannot round up, as we do elsewhere, because we need
2194 the exact size for ZIP2 and REV. We retain the style for
2195 the other helpers for consistency. */
2196
2197 desc = vsz - 2;
2198 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2199 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2200 t_desc = tcg_const_i32(desc);
2201
2202 fn(t_d, t_n, t_desc);
2203
2204 tcg_temp_free_i32(t_desc);
2205 tcg_temp_free_ptr(t_d);
2206 tcg_temp_free_ptr(t_n);
2207 return true;
2208}
2209
2210static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2211{
2212 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2213}
2214
2215static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2216{
2217 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2218}
2219
2220static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2221{
2222 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2223}
2224
2225static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2226{
2227 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2228}
2229
2230static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2231{
2232 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2233}
2234
2235static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2236{
2237 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2238}
2239
2240static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2241{
2242 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2243}
2244
2245static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2246{
2247 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2248}
2249
2250static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2251{
2252 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2253}
2254
234b48e9
RH
2255/*
2256 *** SVE Permute - Interleaving Group
2257 */
2258
2259static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2260{
2261 static gen_helper_gvec_3 * const fns[4] = {
2262 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2263 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2264 };
2265
2266 if (sve_access_check(s)) {
2267 unsigned vsz = vec_full_reg_size(s);
2268 unsigned high_ofs = high ? vsz / 2 : 0;
2269 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2270 vec_full_reg_offset(s, a->rn) + high_ofs,
2271 vec_full_reg_offset(s, a->rm) + high_ofs,
2272 vsz, vsz, 0, fns[a->esz]);
2273 }
2274 return true;
2275}
2276
2277static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2278 gen_helper_gvec_3 *fn)
2279{
2280 if (sve_access_check(s)) {
2281 unsigned vsz = vec_full_reg_size(s);
2282 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2283 vec_full_reg_offset(s, a->rn),
2284 vec_full_reg_offset(s, a->rm),
2285 vsz, vsz, data, fn);
2286 }
2287 return true;
2288}
2289
2290static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2291{
2292 return do_zip(s, a, false);
2293}
2294
2295static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2296{
2297 return do_zip(s, a, true);
2298}
2299
2300static gen_helper_gvec_3 * const uzp_fns[4] = {
2301 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2302 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2303};
2304
2305static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2306{
2307 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2308}
2309
2310static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2311{
2312 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2313}
2314
2315static gen_helper_gvec_3 * const trn_fns[4] = {
2316 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2317 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2318};
2319
2320static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2321{
2322 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2323}
2324
2325static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2326{
2327 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2328}
2329
3ca879ae
RH
2330/*
2331 *** SVE Permute Vector - Predicated Group
2332 */
2333
2334static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2335{
2336 static gen_helper_gvec_3 * const fns[4] = {
2337 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2338 };
2339 return do_zpz_ool(s, a, fns[a->esz]);
2340}
2341
ef23cb72
RH
2342/* Call the helper that computes the ARM LastActiveElement pseudocode
2343 * function, scaled by the element size. This includes the not found
2344 * indication; e.g. not found for esz=3 is -8.
2345 */
2346static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2347{
2348 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2349 * round up, as we do elsewhere, because we need the exact size.
2350 */
2351 TCGv_ptr t_p = tcg_temp_new_ptr();
2352 TCGv_i32 t_desc;
2353 unsigned vsz = pred_full_reg_size(s);
2354 unsigned desc;
2355
2356 desc = vsz - 2;
2357 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2358
2359 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2360 t_desc = tcg_const_i32(desc);
2361
2362 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2363
2364 tcg_temp_free_i32(t_desc);
2365 tcg_temp_free_ptr(t_p);
2366}
2367
2368/* Increment LAST to the offset of the next element in the vector,
2369 * wrapping around to 0.
2370 */
2371static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2372{
2373 unsigned vsz = vec_full_reg_size(s);
2374
2375 tcg_gen_addi_i32(last, last, 1 << esz);
2376 if (is_power_of_2(vsz)) {
2377 tcg_gen_andi_i32(last, last, vsz - 1);
2378 } else {
2379 TCGv_i32 max = tcg_const_i32(vsz);
2380 TCGv_i32 zero = tcg_const_i32(0);
2381 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2382 tcg_temp_free_i32(max);
2383 tcg_temp_free_i32(zero);
2384 }
2385}
2386
2387/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2388static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2389{
2390 unsigned vsz = vec_full_reg_size(s);
2391
2392 if (is_power_of_2(vsz)) {
2393 tcg_gen_andi_i32(last, last, vsz - 1);
2394 } else {
2395 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2396 TCGv_i32 zero = tcg_const_i32(0);
2397 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2398 tcg_temp_free_i32(max);
2399 tcg_temp_free_i32(zero);
2400 }
2401}
2402
2403/* Load an unsigned element of ESZ from BASE+OFS. */
2404static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2405{
2406 TCGv_i64 r = tcg_temp_new_i64();
2407
2408 switch (esz) {
2409 case 0:
2410 tcg_gen_ld8u_i64(r, base, ofs);
2411 break;
2412 case 1:
2413 tcg_gen_ld16u_i64(r, base, ofs);
2414 break;
2415 case 2:
2416 tcg_gen_ld32u_i64(r, base, ofs);
2417 break;
2418 case 3:
2419 tcg_gen_ld_i64(r, base, ofs);
2420 break;
2421 default:
2422 g_assert_not_reached();
2423 }
2424 return r;
2425}
2426
2427/* Load an unsigned element of ESZ from RM[LAST]. */
2428static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2429 int rm, int esz)
2430{
2431 TCGv_ptr p = tcg_temp_new_ptr();
2432 TCGv_i64 r;
2433
2434 /* Convert offset into vector into offset into ENV.
2435 * The final adjustment for the vector register base
2436 * is added via constant offset to the load.
2437 */
2438#ifdef HOST_WORDS_BIGENDIAN
2439 /* Adjust for element ordering. See vec_reg_offset. */
2440 if (esz < 3) {
2441 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2442 }
2443#endif
2444 tcg_gen_ext_i32_ptr(p, last);
2445 tcg_gen_add_ptr(p, p, cpu_env);
2446
2447 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2448 tcg_temp_free_ptr(p);
2449
2450 return r;
2451}
2452
2453/* Compute CLAST for a Zreg. */
2454static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2455{
2456 TCGv_i32 last;
2457 TCGLabel *over;
2458 TCGv_i64 ele;
2459 unsigned vsz, esz = a->esz;
2460
2461 if (!sve_access_check(s)) {
2462 return true;
2463 }
2464
2465 last = tcg_temp_local_new_i32();
2466 over = gen_new_label();
2467
2468 find_last_active(s, last, esz, a->pg);
2469
2470 /* There is of course no movcond for a 2048-bit vector,
2471 * so we must branch over the actual store.
2472 */
2473 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2474
2475 if (!before) {
2476 incr_last_active(s, last, esz);
2477 }
2478
2479 ele = load_last_active(s, last, a->rm, esz);
2480 tcg_temp_free_i32(last);
2481
2482 vsz = vec_full_reg_size(s);
2483 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2484 tcg_temp_free_i64(ele);
2485
2486 /* If this insn used MOVPRFX, we may need a second move. */
2487 if (a->rd != a->rn) {
2488 TCGLabel *done = gen_new_label();
2489 tcg_gen_br(done);
2490
2491 gen_set_label(over);
2492 do_mov_z(s, a->rd, a->rn);
2493
2494 gen_set_label(done);
2495 } else {
2496 gen_set_label(over);
2497 }
2498 return true;
2499}
2500
2501static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2502{
2503 return do_clast_vector(s, a, false);
2504}
2505
2506static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2507{
2508 return do_clast_vector(s, a, true);
2509}
2510
2511/* Compute CLAST for a scalar. */
2512static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2513 bool before, TCGv_i64 reg_val)
2514{
2515 TCGv_i32 last = tcg_temp_new_i32();
2516 TCGv_i64 ele, cmp, zero;
2517
2518 find_last_active(s, last, esz, pg);
2519
2520 /* Extend the original value of last prior to incrementing. */
2521 cmp = tcg_temp_new_i64();
2522 tcg_gen_ext_i32_i64(cmp, last);
2523
2524 if (!before) {
2525 incr_last_active(s, last, esz);
2526 }
2527
2528 /* The conceit here is that while last < 0 indicates not found, after
2529 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2530 * from which we can load garbage. We then discard the garbage with
2531 * a conditional move.
2532 */
2533 ele = load_last_active(s, last, rm, esz);
2534 tcg_temp_free_i32(last);
2535
2536 zero = tcg_const_i64(0);
2537 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2538
2539 tcg_temp_free_i64(zero);
2540 tcg_temp_free_i64(cmp);
2541 tcg_temp_free_i64(ele);
2542}
2543
2544/* Compute CLAST for a Vreg. */
2545static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2546{
2547 if (sve_access_check(s)) {
2548 int esz = a->esz;
2549 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2550 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2551
2552 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2553 write_fp_dreg(s, a->rd, reg);
2554 tcg_temp_free_i64(reg);
2555 }
2556 return true;
2557}
2558
2559static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2560{
2561 return do_clast_fp(s, a, false);
2562}
2563
2564static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2565{
2566 return do_clast_fp(s, a, true);
2567}
2568
2569/* Compute CLAST for a Xreg. */
2570static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2571{
2572 TCGv_i64 reg;
2573
2574 if (!sve_access_check(s)) {
2575 return true;
2576 }
2577
2578 reg = cpu_reg(s, a->rd);
2579 switch (a->esz) {
2580 case 0:
2581 tcg_gen_ext8u_i64(reg, reg);
2582 break;
2583 case 1:
2584 tcg_gen_ext16u_i64(reg, reg);
2585 break;
2586 case 2:
2587 tcg_gen_ext32u_i64(reg, reg);
2588 break;
2589 case 3:
2590 break;
2591 default:
2592 g_assert_not_reached();
2593 }
2594
2595 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2596 return true;
2597}
2598
2599static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2600{
2601 return do_clast_general(s, a, false);
2602}
2603
2604static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2605{
2606 return do_clast_general(s, a, true);
2607}
2608
2609/* Compute LAST for a scalar. */
2610static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2611 int pg, int rm, bool before)
2612{
2613 TCGv_i32 last = tcg_temp_new_i32();
2614 TCGv_i64 ret;
2615
2616 find_last_active(s, last, esz, pg);
2617 if (before) {
2618 wrap_last_active(s, last, esz);
2619 } else {
2620 incr_last_active(s, last, esz);
2621 }
2622
2623 ret = load_last_active(s, last, rm, esz);
2624 tcg_temp_free_i32(last);
2625 return ret;
2626}
2627
2628/* Compute LAST for a Vreg. */
2629static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2630{
2631 if (sve_access_check(s)) {
2632 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2633 write_fp_dreg(s, a->rd, val);
2634 tcg_temp_free_i64(val);
2635 }
2636 return true;
2637}
2638
2639static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2640{
2641 return do_last_fp(s, a, false);
2642}
2643
2644static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2645{
2646 return do_last_fp(s, a, true);
2647}
2648
2649/* Compute LAST for a Xreg. */
2650static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2651{
2652 if (sve_access_check(s)) {
2653 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2654 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2655 tcg_temp_free_i64(val);
2656 }
2657 return true;
2658}
2659
2660static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2661{
2662 return do_last_general(s, a, false);
2663}
2664
2665static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2666{
2667 return do_last_general(s, a, true);
2668}
2669
792a5578
RH
2670static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2671{
2672 if (sve_access_check(s)) {
2673 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2674 }
2675 return true;
2676}
2677
2678static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2679{
2680 if (sve_access_check(s)) {
2681 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2682 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2683 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2684 tcg_temp_free_i64(t);
2685 }
2686 return true;
2687}
2688
dae8fb90
RH
2689static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2690{
2691 static gen_helper_gvec_3 * const fns[4] = {
2692 NULL,
2693 gen_helper_sve_revb_h,
2694 gen_helper_sve_revb_s,
2695 gen_helper_sve_revb_d,
2696 };
2697 return do_zpz_ool(s, a, fns[a->esz]);
2698}
2699
2700static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2701{
2702 static gen_helper_gvec_3 * const fns[4] = {
2703 NULL,
2704 NULL,
2705 gen_helper_sve_revh_s,
2706 gen_helper_sve_revh_d,
2707 };
2708 return do_zpz_ool(s, a, fns[a->esz]);
2709}
2710
2711static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2712{
2713 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2714}
2715
2716static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2717{
2718 static gen_helper_gvec_3 * const fns[4] = {
2719 gen_helper_sve_rbit_b,
2720 gen_helper_sve_rbit_h,
2721 gen_helper_sve_rbit_s,
2722 gen_helper_sve_rbit_d,
2723 };
2724 return do_zpz_ool(s, a, fns[a->esz]);
2725}
2726
b48ff240
RH
2727static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2728{
2729 if (sve_access_check(s)) {
2730 unsigned vsz = vec_full_reg_size(s);
2731 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2732 vec_full_reg_offset(s, a->rn),
2733 vec_full_reg_offset(s, a->rm),
2734 pred_full_reg_offset(s, a->pg),
2735 vsz, vsz, a->esz, gen_helper_sve_splice);
2736 }
2737 return true;
2738}
2739
757f9cff
RH
2740/*
2741 *** SVE Integer Compare - Vectors Group
2742 */
2743
2744static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2745 gen_helper_gvec_flags_4 *gen_fn)
2746{
2747 TCGv_ptr pd, zn, zm, pg;
2748 unsigned vsz;
2749 TCGv_i32 t;
2750
2751 if (gen_fn == NULL) {
2752 return false;
2753 }
2754 if (!sve_access_check(s)) {
2755 return true;
2756 }
2757
2758 vsz = vec_full_reg_size(s);
2759 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2760 pd = tcg_temp_new_ptr();
2761 zn = tcg_temp_new_ptr();
2762 zm = tcg_temp_new_ptr();
2763 pg = tcg_temp_new_ptr();
2764
2765 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2766 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2767 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2768 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2769
2770 gen_fn(t, pd, zn, zm, pg, t);
2771
2772 tcg_temp_free_ptr(pd);
2773 tcg_temp_free_ptr(zn);
2774 tcg_temp_free_ptr(zm);
2775 tcg_temp_free_ptr(pg);
2776
2777 do_pred_flags(t);
2778
2779 tcg_temp_free_i32(t);
2780 return true;
2781}
2782
2783#define DO_PPZZ(NAME, name) \
2784static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2785 uint32_t insn) \
2786{ \
2787 static gen_helper_gvec_flags_4 * const fns[4] = { \
2788 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2789 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2790 }; \
2791 return do_ppzz_flags(s, a, fns[a->esz]); \
2792}
2793
2794DO_PPZZ(CMPEQ, cmpeq)
2795DO_PPZZ(CMPNE, cmpne)
2796DO_PPZZ(CMPGT, cmpgt)
2797DO_PPZZ(CMPGE, cmpge)
2798DO_PPZZ(CMPHI, cmphi)
2799DO_PPZZ(CMPHS, cmphs)
2800
2801#undef DO_PPZZ
2802
2803#define DO_PPZW(NAME, name) \
2804static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2805 uint32_t insn) \
2806{ \
2807 static gen_helper_gvec_flags_4 * const fns[4] = { \
2808 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2809 gen_helper_sve_##name##_ppzw_s, NULL \
2810 }; \
2811 return do_ppzz_flags(s, a, fns[a->esz]); \
2812}
2813
2814DO_PPZW(CMPEQ, cmpeq)
2815DO_PPZW(CMPNE, cmpne)
2816DO_PPZW(CMPGT, cmpgt)
2817DO_PPZW(CMPGE, cmpge)
2818DO_PPZW(CMPHI, cmphi)
2819DO_PPZW(CMPHS, cmphs)
2820DO_PPZW(CMPLT, cmplt)
2821DO_PPZW(CMPLE, cmple)
2822DO_PPZW(CMPLO, cmplo)
2823DO_PPZW(CMPLS, cmpls)
2824
2825#undef DO_PPZW
2826
38cadeba
RH
2827/*
2828 *** SVE Integer Compare - Immediate Groups
2829 */
2830
2831static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2832 gen_helper_gvec_flags_3 *gen_fn)
2833{
2834 TCGv_ptr pd, zn, pg;
2835 unsigned vsz;
2836 TCGv_i32 t;
2837
2838 if (gen_fn == NULL) {
2839 return false;
2840 }
2841 if (!sve_access_check(s)) {
2842 return true;
2843 }
2844
2845 vsz = vec_full_reg_size(s);
2846 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2847 pd = tcg_temp_new_ptr();
2848 zn = tcg_temp_new_ptr();
2849 pg = tcg_temp_new_ptr();
2850
2851 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2852 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2853 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2854
2855 gen_fn(t, pd, zn, pg, t);
2856
2857 tcg_temp_free_ptr(pd);
2858 tcg_temp_free_ptr(zn);
2859 tcg_temp_free_ptr(pg);
2860
2861 do_pred_flags(t);
2862
2863 tcg_temp_free_i32(t);
2864 return true;
2865}
2866
2867#define DO_PPZI(NAME, name) \
2868static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2869 uint32_t insn) \
2870{ \
2871 static gen_helper_gvec_flags_3 * const fns[4] = { \
2872 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2873 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2874 }; \
2875 return do_ppzi_flags(s, a, fns[a->esz]); \
2876}
2877
2878DO_PPZI(CMPEQ, cmpeq)
2879DO_PPZI(CMPNE, cmpne)
2880DO_PPZI(CMPGT, cmpgt)
2881DO_PPZI(CMPGE, cmpge)
2882DO_PPZI(CMPHI, cmphi)
2883DO_PPZI(CMPHS, cmphs)
2884DO_PPZI(CMPLT, cmplt)
2885DO_PPZI(CMPLE, cmple)
2886DO_PPZI(CMPLO, cmplo)
2887DO_PPZI(CMPLS, cmpls)
2888
2889#undef DO_PPZI
2890
35da316f
RH
2891/*
2892 *** SVE Partition Break Group
2893 */
2894
2895static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2896 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2897{
2898 if (!sve_access_check(s)) {
2899 return true;
2900 }
2901
2902 unsigned vsz = pred_full_reg_size(s);
2903
2904 /* Predicate sizes may be smaller and cannot use simd_desc. */
2905 TCGv_ptr d = tcg_temp_new_ptr();
2906 TCGv_ptr n = tcg_temp_new_ptr();
2907 TCGv_ptr m = tcg_temp_new_ptr();
2908 TCGv_ptr g = tcg_temp_new_ptr();
2909 TCGv_i32 t = tcg_const_i32(vsz - 2);
2910
2911 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2912 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2913 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2914 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2915
2916 if (a->s) {
2917 fn_s(t, d, n, m, g, t);
2918 do_pred_flags(t);
2919 } else {
2920 fn(d, n, m, g, t);
2921 }
2922 tcg_temp_free_ptr(d);
2923 tcg_temp_free_ptr(n);
2924 tcg_temp_free_ptr(m);
2925 tcg_temp_free_ptr(g);
2926 tcg_temp_free_i32(t);
2927 return true;
2928}
2929
2930static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2931 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2932{
2933 if (!sve_access_check(s)) {
2934 return true;
2935 }
2936
2937 unsigned vsz = pred_full_reg_size(s);
2938
2939 /* Predicate sizes may be smaller and cannot use simd_desc. */
2940 TCGv_ptr d = tcg_temp_new_ptr();
2941 TCGv_ptr n = tcg_temp_new_ptr();
2942 TCGv_ptr g = tcg_temp_new_ptr();
2943 TCGv_i32 t = tcg_const_i32(vsz - 2);
2944
2945 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2946 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2947 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2948
2949 if (a->s) {
2950 fn_s(t, d, n, g, t);
2951 do_pred_flags(t);
2952 } else {
2953 fn(d, n, g, t);
2954 }
2955 tcg_temp_free_ptr(d);
2956 tcg_temp_free_ptr(n);
2957 tcg_temp_free_ptr(g);
2958 tcg_temp_free_i32(t);
2959 return true;
2960}
2961
2962static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2963{
2964 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2965}
2966
2967static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2968{
2969 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2970}
2971
2972static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2973{
2974 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2975}
2976
2977static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2978{
2979 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2980}
2981
2982static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2983{
2984 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2985}
2986
2987static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2988{
2989 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2990}
2991
2992static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2993{
2994 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2995}
2996
9ee3a611
RH
2997/*
2998 *** SVE Predicate Count Group
2999 */
3000
3001static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3002{
3003 unsigned psz = pred_full_reg_size(s);
3004
3005 if (psz <= 8) {
3006 uint64_t psz_mask;
3007
3008 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3009 if (pn != pg) {
3010 TCGv_i64 g = tcg_temp_new_i64();
3011 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3012 tcg_gen_and_i64(val, val, g);
3013 tcg_temp_free_i64(g);
3014 }
3015
3016 /* Reduce the pred_esz_masks value simply to reduce the
3017 * size of the code generated here.
3018 */
3019 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3020 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3021
3022 tcg_gen_ctpop_i64(val, val);
3023 } else {
3024 TCGv_ptr t_pn = tcg_temp_new_ptr();
3025 TCGv_ptr t_pg = tcg_temp_new_ptr();
3026 unsigned desc;
3027 TCGv_i32 t_desc;
3028
3029 desc = psz - 2;
3030 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3031
3032 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3033 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3034 t_desc = tcg_const_i32(desc);
3035
3036 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3037 tcg_temp_free_ptr(t_pn);
3038 tcg_temp_free_ptr(t_pg);
3039 tcg_temp_free_i32(t_desc);
3040 }
3041}
3042
3043static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3044{
3045 if (sve_access_check(s)) {
3046 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3047 }
3048 return true;
3049}
3050
3051static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3052 uint32_t insn)
3053{
3054 if (sve_access_check(s)) {
3055 TCGv_i64 reg = cpu_reg(s, a->rd);
3056 TCGv_i64 val = tcg_temp_new_i64();
3057
3058 do_cntp(s, val, a->esz, a->pg, a->pg);
3059 if (a->d) {
3060 tcg_gen_sub_i64(reg, reg, val);
3061 } else {
3062 tcg_gen_add_i64(reg, reg, val);
3063 }
3064 tcg_temp_free_i64(val);
3065 }
3066 return true;
3067}
3068
3069static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3070 uint32_t insn)
3071{
3072 if (a->esz == 0) {
3073 return false;
3074 }
3075 if (sve_access_check(s)) {
3076 unsigned vsz = vec_full_reg_size(s);
3077 TCGv_i64 val = tcg_temp_new_i64();
3078 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3079
3080 do_cntp(s, val, a->esz, a->pg, a->pg);
3081 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3082 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3083 }
3084 return true;
3085}
3086
3087static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3088 uint32_t insn)
3089{
3090 if (sve_access_check(s)) {
3091 TCGv_i64 reg = cpu_reg(s, a->rd);
3092 TCGv_i64 val = tcg_temp_new_i64();
3093
3094 do_cntp(s, val, a->esz, a->pg, a->pg);
3095 do_sat_addsub_32(reg, val, a->u, a->d);
3096 }
3097 return true;
3098}
3099
3100static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3101 uint32_t insn)
3102{
3103 if (sve_access_check(s)) {
3104 TCGv_i64 reg = cpu_reg(s, a->rd);
3105 TCGv_i64 val = tcg_temp_new_i64();
3106
3107 do_cntp(s, val, a->esz, a->pg, a->pg);
3108 do_sat_addsub_64(reg, val, a->u, a->d);
3109 }
3110 return true;
3111}
3112
3113static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3114 uint32_t insn)
3115{
3116 if (a->esz == 0) {
3117 return false;
3118 }
3119 if (sve_access_check(s)) {
3120 TCGv_i64 val = tcg_temp_new_i64();
3121 do_cntp(s, val, a->esz, a->pg, a->pg);
3122 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3123 }
3124 return true;
3125}
3126
caf1cefc
RH
3127/*
3128 *** SVE Integer Compare Scalars Group
3129 */
3130
3131static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3132{
3133 if (!sve_access_check(s)) {
3134 return true;
3135 }
3136
3137 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3138 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3139 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3140 TCGv_i64 cmp = tcg_temp_new_i64();
3141
3142 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3143 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3144 tcg_temp_free_i64(cmp);
3145
3146 /* VF = !NF & !CF. */
3147 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3148 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3149
3150 /* Both NF and VF actually look at bit 31. */
3151 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3152 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3153 return true;
3154}
3155
3156static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3157{
3158 if (!sve_access_check(s)) {
3159 return true;
3160 }
3161
3162 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3163 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3164 TCGv_i64 t0 = tcg_temp_new_i64();
3165 TCGv_i64 t1 = tcg_temp_new_i64();
3166 TCGv_i32 t2, t3;
3167 TCGv_ptr ptr;
3168 unsigned desc, vsz = vec_full_reg_size(s);
3169 TCGCond cond;
3170
3171 if (!a->sf) {
3172 if (a->u) {
3173 tcg_gen_ext32u_i64(op0, op0);
3174 tcg_gen_ext32u_i64(op1, op1);
3175 } else {
3176 tcg_gen_ext32s_i64(op0, op0);
3177 tcg_gen_ext32s_i64(op1, op1);
3178 }
3179 }
3180
3181 /* For the helper, compress the different conditions into a computation
3182 * of how many iterations for which the condition is true.
3183 *
3184 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3185 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3186 * aren't that large, so any value >= predicate size is sufficient.
3187 */
3188 tcg_gen_sub_i64(t0, op1, op0);
3189
3190 /* t0 = MIN(op1 - op0, vsz). */
3191 tcg_gen_movi_i64(t1, vsz);
3192 tcg_gen_umin_i64(t0, t0, t1);
3193 if (a->eq) {
3194 /* Equality means one more iteration. */
3195 tcg_gen_addi_i64(t0, t0, 1);
3196 }
3197
3198 /* t0 = (condition true ? t0 : 0). */
3199 cond = (a->u
3200 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3201 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3202 tcg_gen_movi_i64(t1, 0);
3203 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3204
3205 t2 = tcg_temp_new_i32();
3206 tcg_gen_extrl_i64_i32(t2, t0);
3207 tcg_temp_free_i64(t0);
3208 tcg_temp_free_i64(t1);
3209
3210 desc = (vsz / 8) - 2;
3211 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3212 t3 = tcg_const_i32(desc);
3213
3214 ptr = tcg_temp_new_ptr();
3215 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3216
3217 gen_helper_sve_while(t2, ptr, t2, t3);
3218 do_pred_flags(t2);
3219
3220 tcg_temp_free_ptr(ptr);
3221 tcg_temp_free_i32(t2);
3222 tcg_temp_free_i32(t3);
3223 return true;
3224}
3225
ed491961
RH
3226/*
3227 *** SVE Integer Wide Immediate - Unpredicated Group
3228 */
3229
3230static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3231{
3232 if (a->esz == 0) {
3233 return false;
3234 }
3235 if (sve_access_check(s)) {
3236 unsigned vsz = vec_full_reg_size(s);
3237 int dofs = vec_full_reg_offset(s, a->rd);
3238 uint64_t imm;
3239
3240 /* Decode the VFP immediate. */
3241 imm = vfp_expand_imm(a->esz, a->imm);
3242 imm = dup_const(a->esz, imm);
3243
3244 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3245 }
3246 return true;
3247}
3248
3249static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3250{
3251 if (a->esz == 0 && extract32(insn, 13, 1)) {
3252 return false;
3253 }
3254 if (sve_access_check(s)) {
3255 unsigned vsz = vec_full_reg_size(s);
3256 int dofs = vec_full_reg_offset(s, a->rd);
3257
3258 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3259 }
3260 return true;
3261}
3262
6e6a157d
RH
3263static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3264{
3265 if (a->esz == 0 && extract32(insn, 13, 1)) {
3266 return false;
3267 }
3268 if (sve_access_check(s)) {
3269 unsigned vsz = vec_full_reg_size(s);
3270 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3271 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3272 }
3273 return true;
3274}
3275
3276static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3277{
3278 a->imm = -a->imm;
3279 return trans_ADD_zzi(s, a, insn);
3280}
3281
3282static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3283{
3284 static const GVecGen2s op[4] = {
3285 { .fni8 = tcg_gen_vec_sub8_i64,
3286 .fniv = tcg_gen_sub_vec,
3287 .fno = gen_helper_sve_subri_b,
3288 .opc = INDEX_op_sub_vec,
3289 .vece = MO_8,
3290 .scalar_first = true },
3291 { .fni8 = tcg_gen_vec_sub16_i64,
3292 .fniv = tcg_gen_sub_vec,
3293 .fno = gen_helper_sve_subri_h,
3294 .opc = INDEX_op_sub_vec,
3295 .vece = MO_16,
3296 .scalar_first = true },
3297 { .fni4 = tcg_gen_sub_i32,
3298 .fniv = tcg_gen_sub_vec,
3299 .fno = gen_helper_sve_subri_s,
3300 .opc = INDEX_op_sub_vec,
3301 .vece = MO_32,
3302 .scalar_first = true },
3303 { .fni8 = tcg_gen_sub_i64,
3304 .fniv = tcg_gen_sub_vec,
3305 .fno = gen_helper_sve_subri_d,
3306 .opc = INDEX_op_sub_vec,
3307 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3308 .vece = MO_64,
3309 .scalar_first = true }
3310 };
3311
3312 if (a->esz == 0 && extract32(insn, 13, 1)) {
3313 return false;
3314 }
3315 if (sve_access_check(s)) {
3316 unsigned vsz = vec_full_reg_size(s);
3317 TCGv_i64 c = tcg_const_i64(a->imm);
3318 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3319 vec_full_reg_offset(s, a->rn),
3320 vsz, vsz, c, &op[a->esz]);
3321 tcg_temp_free_i64(c);
3322 }
3323 return true;
3324}
3325
3326static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3327{
3328 if (sve_access_check(s)) {
3329 unsigned vsz = vec_full_reg_size(s);
3330 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3331 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3332 }
3333 return true;
3334}
3335
3336static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3337 bool u, bool d)
3338{
3339 if (a->esz == 0 && extract32(insn, 13, 1)) {
3340 return false;
3341 }
3342 if (sve_access_check(s)) {
3343 TCGv_i64 val = tcg_const_i64(a->imm);
3344 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3345 tcg_temp_free_i64(val);
3346 }
3347 return true;
3348}
3349
3350static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3351{
3352 return do_zzi_sat(s, a, insn, false, false);
3353}
3354
3355static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3356{
3357 return do_zzi_sat(s, a, insn, true, false);
3358}
3359
3360static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3361{
3362 return do_zzi_sat(s, a, insn, false, true);
3363}
3364
3365static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3366{
3367 return do_zzi_sat(s, a, insn, true, true);
3368}
3369
3370static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3371{
3372 if (sve_access_check(s)) {
3373 unsigned vsz = vec_full_reg_size(s);
3374 TCGv_i64 c = tcg_const_i64(a->imm);
3375
3376 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3377 vec_full_reg_offset(s, a->rn),
3378 c, vsz, vsz, 0, fn);
3379 tcg_temp_free_i64(c);
3380 }
3381 return true;
3382}
3383
3384#define DO_ZZI(NAME, name) \
3385static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3386 uint32_t insn) \
3387{ \
3388 static gen_helper_gvec_2i * const fns[4] = { \
3389 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3390 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3391 }; \
3392 return do_zzi_ool(s, a, fns[a->esz]); \
3393}
3394
3395DO_ZZI(SMAX, smax)
3396DO_ZZI(UMAX, umax)
3397DO_ZZI(SMIN, smin)
3398DO_ZZI(UMIN, umin)
3399
3400#undef DO_ZZI
3401
7f9ddf64
RH
3402/*
3403 *** SVE Floating Point Accumulating Reduction Group
3404 */
3405
3406static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3407{
3408 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3409 TCGv_ptr, TCGv_ptr, TCGv_i32);
3410 static fadda_fn * const fns[3] = {
3411 gen_helper_sve_fadda_h,
3412 gen_helper_sve_fadda_s,
3413 gen_helper_sve_fadda_d,
3414 };
3415 unsigned vsz = vec_full_reg_size(s);
3416 TCGv_ptr t_rm, t_pg, t_fpst;
3417 TCGv_i64 t_val;
3418 TCGv_i32 t_desc;
3419
3420 if (a->esz == 0) {
3421 return false;
3422 }
3423 if (!sve_access_check(s)) {
3424 return true;
3425 }
3426
3427 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3428 t_rm = tcg_temp_new_ptr();
3429 t_pg = tcg_temp_new_ptr();
3430 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3431 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3432 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3433 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3434
3435 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3436
3437 tcg_temp_free_i32(t_desc);
3438 tcg_temp_free_ptr(t_fpst);
3439 tcg_temp_free_ptr(t_pg);
3440 tcg_temp_free_ptr(t_rm);
3441
3442 write_fp_dreg(s, a->rd, t_val);
3443 tcg_temp_free_i64(t_val);
3444 return true;
3445}
3446
29b80469
RH
3447/*
3448 *** SVE Floating Point Arithmetic - Unpredicated Group
3449 */
3450
3451static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3452 gen_helper_gvec_3_ptr *fn)
3453{
3454 if (fn == NULL) {
3455 return false;
3456 }
3457 if (sve_access_check(s)) {
3458 unsigned vsz = vec_full_reg_size(s);
3459 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3460 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3461 vec_full_reg_offset(s, a->rn),
3462 vec_full_reg_offset(s, a->rm),
3463 status, vsz, vsz, 0, fn);
3464 tcg_temp_free_ptr(status);
3465 }
3466 return true;
3467}
3468
3469
3470#define DO_FP3(NAME, name) \
3471static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3472{ \
3473 static gen_helper_gvec_3_ptr * const fns[4] = { \
3474 NULL, gen_helper_gvec_##name##_h, \
3475 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3476 }; \
3477 return do_zzz_fp(s, a, fns[a->esz]); \
3478}
3479
3480DO_FP3(FADD_zzz, fadd)
3481DO_FP3(FSUB_zzz, fsub)
3482DO_FP3(FMUL_zzz, fmul)
3483DO_FP3(FTSMUL, ftsmul)
3484DO_FP3(FRECPS, recps)
3485DO_FP3(FRSQRTS, rsqrts)
3486
3487#undef DO_FP3
3488
ec3b87c2
RH
3489/*
3490 *** SVE Floating Point Arithmetic - Predicated Group
3491 */
3492
3493static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3494 gen_helper_gvec_4_ptr *fn)
3495{
3496 if (fn == NULL) {
3497 return false;
3498 }
3499 if (sve_access_check(s)) {
3500 unsigned vsz = vec_full_reg_size(s);
3501 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3502 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3503 vec_full_reg_offset(s, a->rn),
3504 vec_full_reg_offset(s, a->rm),
3505 pred_full_reg_offset(s, a->pg),
3506 status, vsz, vsz, 0, fn);
3507 tcg_temp_free_ptr(status);
3508 }
3509 return true;
3510}
3511
3512#define DO_FP3(NAME, name) \
3513static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3514{ \
3515 static gen_helper_gvec_4_ptr * const fns[4] = { \
3516 NULL, gen_helper_sve_##name##_h, \
3517 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3518 }; \
3519 return do_zpzz_fp(s, a, fns[a->esz]); \
3520}
3521
3522DO_FP3(FADD_zpzz, fadd)
3523DO_FP3(FSUB_zpzz, fsub)
3524DO_FP3(FMUL_zpzz, fmul)
3525DO_FP3(FMIN_zpzz, fmin)
3526DO_FP3(FMAX_zpzz, fmax)
3527DO_FP3(FMINNM_zpzz, fminnum)
3528DO_FP3(FMAXNM_zpzz, fmaxnum)
3529DO_FP3(FABD, fabd)
3530DO_FP3(FSCALE, fscalbn)
3531DO_FP3(FDIV, fdiv)
3532DO_FP3(FMULX, fmulx)
3533
3534#undef DO_FP3
8092c6a3 3535
6ceabaad
RH
3536typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3537
3538static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3539{
3540 if (fn == NULL) {
3541 return false;
3542 }
3543 if (!sve_access_check(s)) {
3544 return true;
3545 }
3546
3547 unsigned vsz = vec_full_reg_size(s);
3548 unsigned desc;
3549 TCGv_i32 t_desc;
3550 TCGv_ptr pg = tcg_temp_new_ptr();
3551
3552 /* We would need 7 operands to pass these arguments "properly".
3553 * So we encode all the register numbers into the descriptor.
3554 */
3555 desc = deposit32(a->rd, 5, 5, a->rn);
3556 desc = deposit32(desc, 10, 5, a->rm);
3557 desc = deposit32(desc, 15, 5, a->ra);
3558 desc = simd_desc(vsz, vsz, desc);
3559
3560 t_desc = tcg_const_i32(desc);
3561 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3562 fn(cpu_env, pg, t_desc);
3563 tcg_temp_free_i32(t_desc);
3564 tcg_temp_free_ptr(pg);
3565 return true;
3566}
3567
3568#define DO_FMLA(NAME, name) \
3569static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3570{ \
3571 static gen_helper_sve_fmla * const fns[4] = { \
3572 NULL, gen_helper_sve_##name##_h, \
3573 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3574 }; \
3575 return do_fmla(s, a, fns[a->esz]); \
3576}
3577
3578DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3579DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3580DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3581DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3582
3583#undef DO_FMLA
3584
8092c6a3
RH
3585/*
3586 *** SVE Floating Point Unary Operations Predicated Group
3587 */
3588
3589static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3590 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3591{
3592 if (sve_access_check(s)) {
3593 unsigned vsz = vec_full_reg_size(s);
3594 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3595 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3596 vec_full_reg_offset(s, rn),
3597 pred_full_reg_offset(s, pg),
3598 status, vsz, vsz, 0, fn);
3599 tcg_temp_free_ptr(status);
3600 }
3601 return true;
3602}
3603
3604static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3605{
3606 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3607}
3608
3609static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3610{
3611 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3612}
3613
3614static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3615{
3616 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3617}
3618
3619static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3620{
3621 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3622}
3623
3624static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3625{
3626 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3627}
3628
3629static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3630{
3631 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3632}
3633
3634static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3635{
3636 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3637}
3638
3639static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3640{
3641 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3642}
3643
3644static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3645{
3646 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3647}
3648
3649static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3650{
3651 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3652}
3653
3654static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3655{
3656 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3657}
3658
3659static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3660{
3661 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3662}
3663
3664static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3665{
3666 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3667}
3668
3669static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3670{
3671 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3672}
3673
d1822297
RH
3674/*
3675 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3676 */
3677
3678/* Subroutine loading a vector register at VOFS of LEN bytes.
3679 * The load should begin at the address Rn + IMM.
3680 */
3681
3682static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3683 int rn, int imm)
3684{
3685 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3686 uint32_t len_remain = len % 8;
3687 uint32_t nparts = len / 8 + ctpop8(len_remain);
3688 int midx = get_mem_index(s);
3689 TCGv_i64 addr, t0, t1;
3690
3691 addr = tcg_temp_new_i64();
3692 t0 = tcg_temp_new_i64();
3693
3694 /* Note that unpredicated load/store of vector/predicate registers
3695 * are defined as a stream of bytes, which equates to little-endian
3696 * operations on larger quantities. There is no nice way to force
3697 * a little-endian load for aarch64_be-linux-user out of line.
3698 *
3699 * Attempt to keep code expansion to a minimum by limiting the
3700 * amount of unrolling done.
3701 */
3702 if (nparts <= 4) {
3703 int i;
3704
3705 for (i = 0; i < len_align; i += 8) {
3706 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3707 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3708 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3709 }
3710 } else {
3711 TCGLabel *loop = gen_new_label();
3712 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3713
3714 gen_set_label(loop);
3715
3716 /* Minimize the number of local temps that must be re-read from
3717 * the stack each iteration. Instead, re-compute values other
3718 * than the loop counter.
3719 */
3720 tp = tcg_temp_new_ptr();
3721 tcg_gen_addi_ptr(tp, i, imm);
3722 tcg_gen_extu_ptr_i64(addr, tp);
3723 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3724
3725 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3726
3727 tcg_gen_add_ptr(tp, cpu_env, i);
3728 tcg_gen_addi_ptr(i, i, 8);
3729 tcg_gen_st_i64(t0, tp, vofs);
3730 tcg_temp_free_ptr(tp);
3731
3732 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3733 tcg_temp_free_ptr(i);
3734 }
3735
3736 /* Predicate register loads can be any multiple of 2.
3737 * Note that we still store the entire 64-bit unit into cpu_env.
3738 */
3739 if (len_remain) {
3740 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3741
3742 switch (len_remain) {
3743 case 2:
3744 case 4:
3745 case 8:
3746 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3747 break;
3748
3749 case 6:
3750 t1 = tcg_temp_new_i64();
3751 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3752 tcg_gen_addi_i64(addr, addr, 4);
3753 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3754 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3755 tcg_temp_free_i64(t1);
3756 break;
3757
3758 default:
3759 g_assert_not_reached();
3760 }
3761 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3762 }
3763 tcg_temp_free_i64(addr);
3764 tcg_temp_free_i64(t0);
3765}
3766
5047c204
RH
3767/* Similarly for stores. */
3768static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
3769 int rn, int imm)
3770{
3771 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3772 uint32_t len_remain = len % 8;
3773 uint32_t nparts = len / 8 + ctpop8(len_remain);
3774 int midx = get_mem_index(s);
3775 TCGv_i64 addr, t0;
3776
3777 addr = tcg_temp_new_i64();
3778 t0 = tcg_temp_new_i64();
3779
3780 /* Note that unpredicated load/store of vector/predicate registers
3781 * are defined as a stream of bytes, which equates to little-endian
3782 * operations on larger quantities. There is no nice way to force
3783 * a little-endian store for aarch64_be-linux-user out of line.
3784 *
3785 * Attempt to keep code expansion to a minimum by limiting the
3786 * amount of unrolling done.
3787 */
3788 if (nparts <= 4) {
3789 int i;
3790
3791 for (i = 0; i < len_align; i += 8) {
3792 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
3793 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3794 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3795 }
3796 } else {
3797 TCGLabel *loop = gen_new_label();
3798 TCGv_ptr t2, i = tcg_const_local_ptr(0);
3799
3800 gen_set_label(loop);
3801
3802 t2 = tcg_temp_new_ptr();
3803 tcg_gen_add_ptr(t2, cpu_env, i);
3804 tcg_gen_ld_i64(t0, t2, vofs);
3805
3806 /* Minimize the number of local temps that must be re-read from
3807 * the stack each iteration. Instead, re-compute values other
3808 * than the loop counter.
3809 */
3810 tcg_gen_addi_ptr(t2, i, imm);
3811 tcg_gen_extu_ptr_i64(addr, t2);
3812 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3813 tcg_temp_free_ptr(t2);
3814
3815 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3816
3817 tcg_gen_addi_ptr(i, i, 8);
3818
3819 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3820 tcg_temp_free_ptr(i);
3821 }
3822
3823 /* Predicate register stores can be any multiple of 2. */
3824 if (len_remain) {
3825 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
3826 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3827
3828 switch (len_remain) {
3829 case 2:
3830 case 4:
3831 case 8:
3832 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3833 break;
3834
3835 case 6:
3836 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
3837 tcg_gen_addi_i64(addr, addr, 4);
3838 tcg_gen_shri_i64(t0, t0, 32);
3839 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
3840 break;
3841
3842 default:
3843 g_assert_not_reached();
3844 }
3845 }
3846 tcg_temp_free_i64(addr);
3847 tcg_temp_free_i64(t0);
3848}
3849
d1822297
RH
3850static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3851{
3852 if (sve_access_check(s)) {
3853 int size = vec_full_reg_size(s);
3854 int off = vec_full_reg_offset(s, a->rd);
3855 do_ldr(s, off, size, a->rn, a->imm * size);
3856 }
3857 return true;
3858}
3859
3860static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3861{
3862 if (sve_access_check(s)) {
3863 int size = pred_full_reg_size(s);
3864 int off = pred_full_reg_offset(s, a->rd);
3865 do_ldr(s, off, size, a->rn, a->imm * size);
3866 }
3867 return true;
3868}
c4e7c493 3869
5047c204
RH
3870static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3871{
3872 if (sve_access_check(s)) {
3873 int size = vec_full_reg_size(s);
3874 int off = vec_full_reg_offset(s, a->rd);
3875 do_str(s, off, size, a->rn, a->imm * size);
3876 }
3877 return true;
3878}
3879
3880static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3881{
3882 if (sve_access_check(s)) {
3883 int size = pred_full_reg_size(s);
3884 int off = pred_full_reg_offset(s, a->rd);
3885 do_str(s, off, size, a->rn, a->imm * size);
3886 }
3887 return true;
3888}
3889
c4e7c493
RH
3890/*
3891 *** SVE Memory - Contiguous Load Group
3892 */
3893
3894/* The memory mode of the dtype. */
3895static const TCGMemOp dtype_mop[16] = {
3896 MO_UB, MO_UB, MO_UB, MO_UB,
3897 MO_SL, MO_UW, MO_UW, MO_UW,
3898 MO_SW, MO_SW, MO_UL, MO_UL,
3899 MO_SB, MO_SB, MO_SB, MO_Q
3900};
3901
3902#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
3903
3904/* The vector element size of dtype. */
3905static const uint8_t dtype_esz[16] = {
3906 0, 1, 2, 3,
3907 3, 1, 2, 3,
3908 3, 2, 2, 3,
3909 3, 2, 1, 3
3910};
3911
3912static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3913 gen_helper_gvec_mem *fn)
3914{
3915 unsigned vsz = vec_full_reg_size(s);
3916 TCGv_ptr t_pg;
3917 TCGv_i32 desc;
3918
3919 /* For e.g. LD4, there are not enough arguments to pass all 4
3920 * registers as pointers, so encode the regno into the data field.
3921 * For consistency, do this even for LD1.
3922 */
3923 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
3924 t_pg = tcg_temp_new_ptr();
3925
3926 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3927 fn(cpu_env, t_pg, addr, desc);
3928
3929 tcg_temp_free_ptr(t_pg);
3930 tcg_temp_free_i32(desc);
3931}
3932
3933static void do_ld_zpa(DisasContext *s, int zt, int pg,
3934 TCGv_i64 addr, int dtype, int nreg)
3935{
3936 static gen_helper_gvec_mem * const fns[16][4] = {
3937 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
3938 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
3939 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
3940 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
3941 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
3942
3943 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
3944 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
3945 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
3946 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
3947 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
3948
3949 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
3950 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
3951 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
3952 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
3953 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
3954
3955 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
3956 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
3957 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
3958 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
3959 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
3960 };
3961 gen_helper_gvec_mem *fn = fns[dtype][nreg];
3962
3963 /* While there are holes in the table, they are not
3964 * accessible via the instruction encoding.
3965 */
3966 assert(fn != NULL);
3967 do_mem_zpa(s, zt, pg, addr, fn);
3968}
3969
3970static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3971{
3972 if (a->rm == 31) {
3973 return false;
3974 }
3975 if (sve_access_check(s)) {
3976 TCGv_i64 addr = new_tmp_a64(s);
3977 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
3978 (a->nreg + 1) << dtype_msz(a->dtype));
3979 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3980 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3981 }
3982 return true;
3983}
3984
3985static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3986{
3987 if (sve_access_check(s)) {
3988 int vsz = vec_full_reg_size(s);
3989 int elements = vsz >> dtype_esz[a->dtype];
3990 TCGv_i64 addr = new_tmp_a64(s);
3991
3992 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3993 (a->imm * elements * (a->nreg + 1))
3994 << dtype_msz(a->dtype));
3995 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3996 }
3997 return true;
3998}
e2654d75
RH
3999
4000static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4001{
4002 static gen_helper_gvec_mem * const fns[16] = {
4003 gen_helper_sve_ldff1bb_r,
4004 gen_helper_sve_ldff1bhu_r,
4005 gen_helper_sve_ldff1bsu_r,
4006 gen_helper_sve_ldff1bdu_r,
4007
4008 gen_helper_sve_ldff1sds_r,
4009 gen_helper_sve_ldff1hh_r,
4010 gen_helper_sve_ldff1hsu_r,
4011 gen_helper_sve_ldff1hdu_r,
4012
4013 gen_helper_sve_ldff1hds_r,
4014 gen_helper_sve_ldff1hss_r,
4015 gen_helper_sve_ldff1ss_r,
4016 gen_helper_sve_ldff1sdu_r,
4017
4018 gen_helper_sve_ldff1bds_r,
4019 gen_helper_sve_ldff1bss_r,
4020 gen_helper_sve_ldff1bhs_r,
4021 gen_helper_sve_ldff1dd_r,
4022 };
4023
4024 if (sve_access_check(s)) {
4025 TCGv_i64 addr = new_tmp_a64(s);
4026 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4027 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4028 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4029 }
4030 return true;
4031}
4032
4033static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4034{
4035 static gen_helper_gvec_mem * const fns[16] = {
4036 gen_helper_sve_ldnf1bb_r,
4037 gen_helper_sve_ldnf1bhu_r,
4038 gen_helper_sve_ldnf1bsu_r,
4039 gen_helper_sve_ldnf1bdu_r,
4040
4041 gen_helper_sve_ldnf1sds_r,
4042 gen_helper_sve_ldnf1hh_r,
4043 gen_helper_sve_ldnf1hsu_r,
4044 gen_helper_sve_ldnf1hdu_r,
4045
4046 gen_helper_sve_ldnf1hds_r,
4047 gen_helper_sve_ldnf1hss_r,
4048 gen_helper_sve_ldnf1ss_r,
4049 gen_helper_sve_ldnf1sdu_r,
4050
4051 gen_helper_sve_ldnf1bds_r,
4052 gen_helper_sve_ldnf1bss_r,
4053 gen_helper_sve_ldnf1bhs_r,
4054 gen_helper_sve_ldnf1dd_r,
4055 };
4056
4057 if (sve_access_check(s)) {
4058 int vsz = vec_full_reg_size(s);
4059 int elements = vsz >> dtype_esz[a->dtype];
4060 int off = (a->imm * elements) << dtype_msz(a->dtype);
4061 TCGv_i64 addr = new_tmp_a64(s);
4062
4063 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4064 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4065 }
4066 return true;
4067}
1a039c7e 4068
05abe304
RH
4069static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4070{
4071 static gen_helper_gvec_mem * const fns[4] = {
4072 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4073 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4074 };
4075 unsigned vsz = vec_full_reg_size(s);
4076 TCGv_ptr t_pg;
4077 TCGv_i32 desc;
4078
4079 /* Load the first quadword using the normal predicated load helpers. */
4080 desc = tcg_const_i32(simd_desc(16, 16, zt));
4081 t_pg = tcg_temp_new_ptr();
4082
4083 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4084 fns[msz](cpu_env, t_pg, addr, desc);
4085
4086 tcg_temp_free_ptr(t_pg);
4087 tcg_temp_free_i32(desc);
4088
4089 /* Replicate that first quadword. */
4090 if (vsz > 16) {
4091 unsigned dofs = vec_full_reg_offset(s, zt);
4092 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4093 }
4094}
4095
4096static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4097{
4098 if (a->rm == 31) {
4099 return false;
4100 }
4101 if (sve_access_check(s)) {
4102 int msz = dtype_msz(a->dtype);
4103 TCGv_i64 addr = new_tmp_a64(s);
4104 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4105 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4106 do_ldrq(s, a->rd, a->pg, addr, msz);
4107 }
4108 return true;
4109}
4110
4111static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4112{
4113 if (sve_access_check(s)) {
4114 TCGv_i64 addr = new_tmp_a64(s);
4115 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4116 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4117 }
4118 return true;
4119}
4120
68459864
RH
4121/* Load and broadcast element. */
4122static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4123{
4124 if (!sve_access_check(s)) {
4125 return true;
4126 }
4127
4128 unsigned vsz = vec_full_reg_size(s);
4129 unsigned psz = pred_full_reg_size(s);
4130 unsigned esz = dtype_esz[a->dtype];
4131 TCGLabel *over = gen_new_label();
4132 TCGv_i64 temp;
4133
4134 /* If the guarding predicate has no bits set, no load occurs. */
4135 if (psz <= 8) {
4136 /* Reduce the pred_esz_masks value simply to reduce the
4137 * size of the code generated here.
4138 */
4139 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4140 temp = tcg_temp_new_i64();
4141 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4142 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4143 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4144 tcg_temp_free_i64(temp);
4145 } else {
4146 TCGv_i32 t32 = tcg_temp_new_i32();
4147 find_last_active(s, t32, esz, a->pg);
4148 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4149 tcg_temp_free_i32(t32);
4150 }
4151
4152 /* Load the data. */
4153 temp = tcg_temp_new_i64();
4154 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4155 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4156 s->be_data | dtype_mop[a->dtype]);
4157
4158 /* Broadcast to *all* elements. */
4159 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4160 vsz, vsz, temp);
4161 tcg_temp_free_i64(temp);
4162
4163 /* Zero the inactive elements. */
4164 gen_set_label(over);
4165 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4166 return true;
4167}
4168
1a039c7e
RH
4169static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4170 int msz, int esz, int nreg)
4171{
4172 static gen_helper_gvec_mem * const fn_single[4][4] = {
4173 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4174 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4175 { NULL, gen_helper_sve_st1hh_r,
4176 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4177 { NULL, NULL,
4178 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4179 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4180 };
4181 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4182 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4183 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4184 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4185 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4186 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4187 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4188 };
4189 gen_helper_gvec_mem *fn;
4190
4191 if (nreg == 0) {
4192 /* ST1 */
4193 fn = fn_single[msz][esz];
4194 } else {
4195 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4196 assert(msz == esz);
4197 fn = fn_multiple[nreg - 1][msz];
4198 }
4199 assert(fn != NULL);
4200 do_mem_zpa(s, zt, pg, addr, fn);
4201}
4202
4203static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4204{
4205 if (a->rm == 31 || a->msz > a->esz) {
4206 return false;
4207 }
4208 if (sve_access_check(s)) {
4209 TCGv_i64 addr = new_tmp_a64(s);
4210 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4211 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4212 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4213 }
4214 return true;
4215}
4216
4217static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4218{
4219 if (a->msz > a->esz) {
4220 return false;
4221 }
4222 if (sve_access_check(s)) {
4223 int vsz = vec_full_reg_size(s);
4224 int elements = vsz >> a->esz;
4225 TCGv_i64 addr = new_tmp_a64(s);
4226
4227 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4228 (a->imm * elements * (a->nreg + 1)) << a->msz);
4229 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4230 }
4231 return true;
4232}
f6dbf62a
RH
4233
4234/*
4235 *** SVE gather loads / scatter stores
4236 */
4237
4238static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4239 TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4240{
4241 unsigned vsz = vec_full_reg_size(s);
4242 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4243 TCGv_ptr t_zm = tcg_temp_new_ptr();
4244 TCGv_ptr t_pg = tcg_temp_new_ptr();
4245 TCGv_ptr t_zt = tcg_temp_new_ptr();
4246
4247 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4248 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4249 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4250 fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4251
4252 tcg_temp_free_ptr(t_zt);
4253 tcg_temp_free_ptr(t_zm);
4254 tcg_temp_free_ptr(t_pg);
4255 tcg_temp_free_i32(desc);
4256}
4257
673e9fa6
RH
4258/* Indexed by [ff][xs][u][msz]. */
4259static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4260 { { { gen_helper_sve_ldbss_zsu,
4261 gen_helper_sve_ldhss_zsu,
4262 NULL, },
4263 { gen_helper_sve_ldbsu_zsu,
4264 gen_helper_sve_ldhsu_zsu,
4265 gen_helper_sve_ldssu_zsu, } },
4266 { { gen_helper_sve_ldbss_zss,
4267 gen_helper_sve_ldhss_zss,
4268 NULL, },
4269 { gen_helper_sve_ldbsu_zss,
4270 gen_helper_sve_ldhsu_zss,
4271 gen_helper_sve_ldssu_zss, } } },
ed67eb7f
RH
4272
4273 { { { gen_helper_sve_ldffbss_zsu,
4274 gen_helper_sve_ldffhss_zsu,
4275 NULL, },
4276 { gen_helper_sve_ldffbsu_zsu,
4277 gen_helper_sve_ldffhsu_zsu,
4278 gen_helper_sve_ldffssu_zsu, } },
4279 { { gen_helper_sve_ldffbss_zss,
4280 gen_helper_sve_ldffhss_zss,
4281 NULL, },
4282 { gen_helper_sve_ldffbsu_zss,
4283 gen_helper_sve_ldffhsu_zss,
4284 gen_helper_sve_ldffssu_zss, } } }
673e9fa6
RH
4285};
4286
4287/* Note that we overload xs=2 to indicate 64-bit offset. */
4288static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4289 { { { gen_helper_sve_ldbds_zsu,
4290 gen_helper_sve_ldhds_zsu,
4291 gen_helper_sve_ldsds_zsu,
4292 NULL, },
4293 { gen_helper_sve_ldbdu_zsu,
4294 gen_helper_sve_ldhdu_zsu,
4295 gen_helper_sve_ldsdu_zsu,
4296 gen_helper_sve_ldddu_zsu, } },
4297 { { gen_helper_sve_ldbds_zss,
4298 gen_helper_sve_ldhds_zss,
4299 gen_helper_sve_ldsds_zss,
4300 NULL, },
4301 { gen_helper_sve_ldbdu_zss,
4302 gen_helper_sve_ldhdu_zss,
4303 gen_helper_sve_ldsdu_zss,
4304 gen_helper_sve_ldddu_zss, } },
4305 { { gen_helper_sve_ldbds_zd,
4306 gen_helper_sve_ldhds_zd,
4307 gen_helper_sve_ldsds_zd,
4308 NULL, },
4309 { gen_helper_sve_ldbdu_zd,
4310 gen_helper_sve_ldhdu_zd,
4311 gen_helper_sve_ldsdu_zd,
4312 gen_helper_sve_ldddu_zd, } } },
ed67eb7f
RH
4313
4314 { { { gen_helper_sve_ldffbds_zsu,
4315 gen_helper_sve_ldffhds_zsu,
4316 gen_helper_sve_ldffsds_zsu,
4317 NULL, },
4318 { gen_helper_sve_ldffbdu_zsu,
4319 gen_helper_sve_ldffhdu_zsu,
4320 gen_helper_sve_ldffsdu_zsu,
4321 gen_helper_sve_ldffddu_zsu, } },
4322 { { gen_helper_sve_ldffbds_zss,
4323 gen_helper_sve_ldffhds_zss,
4324 gen_helper_sve_ldffsds_zss,
4325 NULL, },
4326 { gen_helper_sve_ldffbdu_zss,
4327 gen_helper_sve_ldffhdu_zss,
4328 gen_helper_sve_ldffsdu_zss,
4329 gen_helper_sve_ldffddu_zss, } },
4330 { { gen_helper_sve_ldffbds_zd,
4331 gen_helper_sve_ldffhds_zd,
4332 gen_helper_sve_ldffsds_zd,
4333 NULL, },
4334 { gen_helper_sve_ldffbdu_zd,
4335 gen_helper_sve_ldffhdu_zd,
4336 gen_helper_sve_ldffsdu_zd,
4337 gen_helper_sve_ldffddu_zd, } } }
673e9fa6
RH
4338};
4339
4340static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
4341{
4342 gen_helper_gvec_mem_scatter *fn = NULL;
4343
4344 if (!sve_access_check(s)) {
4345 return true;
4346 }
4347
4348 switch (a->esz) {
4349 case MO_32:
4350 fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
4351 break;
4352 case MO_64:
4353 fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
4354 break;
4355 }
4356 assert(fn != NULL);
4357
4358 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4359 cpu_reg_sp(s, a->rn), fn);
4360 return true;
4361}
4362
4363static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
4364{
4365 gen_helper_gvec_mem_scatter *fn = NULL;
4366 TCGv_i64 imm;
4367
4368 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
4369 return false;
4370 }
4371 if (!sve_access_check(s)) {
4372 return true;
4373 }
4374
4375 switch (a->esz) {
4376 case MO_32:
4377 fn = gather_load_fn32[a->ff][0][a->u][a->msz];
4378 break;
4379 case MO_64:
4380 fn = gather_load_fn64[a->ff][2][a->u][a->msz];
4381 break;
4382 }
4383 assert(fn != NULL);
4384
4385 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
4386 * by loading the immediate into the scalar parameter.
4387 */
4388 imm = tcg_const_i64(a->imm << a->msz);
4389 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4390 tcg_temp_free_i64(imm);
4391 return true;
4392}
4393
408ecde9
RH
4394/* Indexed by [xs][msz]. */
4395static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
4396 { gen_helper_sve_stbs_zsu,
4397 gen_helper_sve_sths_zsu,
4398 gen_helper_sve_stss_zsu, },
4399 { gen_helper_sve_stbs_zss,
4400 gen_helper_sve_sths_zss,
4401 gen_helper_sve_stss_zss, },
4402};
4403
4404/* Note that we overload xs=2 to indicate 64-bit offset. */
4405static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
4406 { gen_helper_sve_stbd_zsu,
4407 gen_helper_sve_sthd_zsu,
4408 gen_helper_sve_stsd_zsu,
4409 gen_helper_sve_stdd_zsu, },
4410 { gen_helper_sve_stbd_zss,
4411 gen_helper_sve_sthd_zss,
4412 gen_helper_sve_stsd_zss,
4413 gen_helper_sve_stdd_zss, },
4414 { gen_helper_sve_stbd_zd,
4415 gen_helper_sve_sthd_zd,
4416 gen_helper_sve_stsd_zd,
4417 gen_helper_sve_stdd_zd, },
4418};
4419
f6dbf62a
RH
4420static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
4421{
f6dbf62a
RH
4422 gen_helper_gvec_mem_scatter *fn;
4423
4424 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
4425 return false;
4426 }
4427 if (!sve_access_check(s)) {
4428 return true;
4429 }
4430 switch (a->esz) {
4431 case MO_32:
408ecde9 4432 fn = scatter_store_fn32[a->xs][a->msz];
f6dbf62a
RH
4433 break;
4434 case MO_64:
408ecde9 4435 fn = scatter_store_fn64[a->xs][a->msz];
f6dbf62a
RH
4436 break;
4437 default:
4438 g_assert_not_reached();
4439 }
4440 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4441 cpu_reg_sp(s, a->rn), fn);
4442 return true;
4443}
dec6cf6b 4444
408ecde9
RH
4445static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
4446{
4447 gen_helper_gvec_mem_scatter *fn = NULL;
4448 TCGv_i64 imm;
4449
4450 if (a->esz < a->msz) {
4451 return false;
4452 }
4453 if (!sve_access_check(s)) {
4454 return true;
4455 }
4456
4457 switch (a->esz) {
4458 case MO_32:
4459 fn = scatter_store_fn32[0][a->msz];
4460 break;
4461 case MO_64:
4462 fn = scatter_store_fn64[2][a->msz];
4463 break;
4464 }
4465 assert(fn != NULL);
4466
4467 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
4468 * by loading the immediate into the scalar parameter.
4469 */
4470 imm = tcg_const_i64(a->imm << a->msz);
4471 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4472 tcg_temp_free_i64(imm);
4473 return true;
4474}
4475
dec6cf6b
RH
4476/*
4477 * Prefetches
4478 */
4479
4480static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
4481{
4482 /* Prefetch is a nop within QEMU. */
4483 sve_access_check(s);
4484 return true;
4485}
4486
4487static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
4488{
4489 if (a->rm == 31) {
4490 return false;
4491 }
4492 /* Prefetch is a nop within QEMU. */
4493 sve_access_check(s);
4494 return true;
4495}