]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement SVE store vector/predicate register
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493
RH
45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
46
ccd841c3
RH
47/*
48 * Helpers for extracting complex instruction fields.
49 */
50
51/* See e.g. ASR (immediate, predicated).
52 * Returns -1 for unallocated encoding; diagnose later.
53 */
54static int tszimm_esz(int x)
55{
56 x >>= 3; /* discard imm3 */
57 return 31 - clz32(x);
58}
59
60static int tszimm_shr(int x)
61{
62 return (16 << tszimm_esz(x)) - x;
63}
64
65/* See e.g. LSL (immediate, predicated). */
66static int tszimm_shl(int x)
67{
68 return x - (8 << tszimm_esz(x));
69}
70
24e82e68
RH
71static inline int plus1(int x)
72{
73 return x + 1;
74}
75
f25a2361
RH
76/* The SH bit is in bit 8. Extract the low 8 and shift. */
77static inline int expand_imm_sh8s(int x)
78{
79 return (int8_t)x << (x & 0x100 ? 8 : 0);
80}
81
6e6a157d
RH
82static inline int expand_imm_sh8u(int x)
83{
84 return (uint8_t)x << (x & 0x100 ? 8 : 0);
85}
86
c4e7c493
RH
87/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
88 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
89 */
90static inline int msz_dtype(int msz)
91{
92 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
93 return dtype[msz];
94}
95
38388f7e
RH
96/*
97 * Include the generated decoder.
98 */
99
100#include "decode-sve.inc.c"
101
102/*
103 * Implement all of the translator functions referenced by the decoder.
104 */
105
d1822297
RH
106/* Return the offset info CPUARMState of the predicate vector register Pn.
107 * Note for this purpose, FFR is P16.
108 */
109static inline int pred_full_reg_offset(DisasContext *s, int regno)
110{
111 return offsetof(CPUARMState, vfp.pregs[regno]);
112}
113
114/* Return the byte size of the whole predicate register, VL / 64. */
115static inline int pred_full_reg_size(DisasContext *s)
116{
117 return s->sve_len >> 3;
118}
119
516e246a
RH
120/* Round up the size of a register to a size allowed by
121 * the tcg vector infrastructure. Any operation which uses this
122 * size may assume that the bits above pred_full_reg_size are zero,
123 * and must leave them the same way.
124 *
125 * Note that this is not needed for the vector registers as they
126 * are always properly sized for tcg vectors.
127 */
128static int size_for_gvec(int size)
129{
130 if (size <= 8) {
131 return 8;
132 } else {
133 return QEMU_ALIGN_UP(size, 16);
134 }
135}
136
137static int pred_gvec_reg_size(DisasContext *s)
138{
139 return size_for_gvec(pred_full_reg_size(s));
140}
141
39eea561
RH
142/* Invoke a vector expander on two Zregs. */
143static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
144 int esz, int rd, int rn)
38388f7e 145{
39eea561
RH
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 gvec_fn(esz, vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn), vsz, vsz);
150 }
151 return true;
38388f7e
RH
152}
153
39eea561
RH
154/* Invoke a vector expander on three Zregs. */
155static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
156 int esz, int rd, int rn, int rm)
38388f7e 157{
39eea561
RH
158 if (sve_access_check(s)) {
159 unsigned vsz = vec_full_reg_size(s);
160 gvec_fn(esz, vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm), vsz, vsz);
163 }
164 return true;
38388f7e
RH
165}
166
39eea561
RH
167/* Invoke a vector move on two Zregs. */
168static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 169{
39eea561 170 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
171}
172
d9d78dcc
RH
173/* Initialize a Zreg with replications of a 64-bit immediate. */
174static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
175{
176 unsigned vsz = vec_full_reg_size(s);
177 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
178}
179
516e246a
RH
180/* Invoke a vector expander on two Pregs. */
181static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
182 int esz, int rd, int rn)
183{
184 if (sve_access_check(s)) {
185 unsigned psz = pred_gvec_reg_size(s);
186 gvec_fn(esz, pred_full_reg_offset(s, rd),
187 pred_full_reg_offset(s, rn), psz, psz);
188 }
189 return true;
190}
191
192/* Invoke a vector expander on three Pregs. */
193static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
194 int esz, int rd, int rn, int rm)
195{
196 if (sve_access_check(s)) {
197 unsigned psz = pred_gvec_reg_size(s);
198 gvec_fn(esz, pred_full_reg_offset(s, rd),
199 pred_full_reg_offset(s, rn),
200 pred_full_reg_offset(s, rm), psz, psz);
201 }
202 return true;
203}
204
205/* Invoke a vector operation on four Pregs. */
206static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
207 int rd, int rn, int rm, int rg)
208{
209 if (sve_access_check(s)) {
210 unsigned psz = pred_gvec_reg_size(s);
211 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
212 pred_full_reg_offset(s, rn),
213 pred_full_reg_offset(s, rm),
214 pred_full_reg_offset(s, rg),
215 psz, psz, gvec_op);
216 }
217 return true;
218}
219
220/* Invoke a vector move on two Pregs. */
221static bool do_mov_p(DisasContext *s, int rd, int rn)
222{
223 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
224}
225
9e18d7a6
RH
226/* Set the cpu flags as per a return from an SVE helper. */
227static void do_pred_flags(TCGv_i32 t)
228{
229 tcg_gen_mov_i32(cpu_NF, t);
230 tcg_gen_andi_i32(cpu_ZF, t, 2);
231 tcg_gen_andi_i32(cpu_CF, t, 1);
232 tcg_gen_movi_i32(cpu_VF, 0);
233}
234
235/* Subroutines computing the ARM PredTest psuedofunction. */
236static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
237{
238 TCGv_i32 t = tcg_temp_new_i32();
239
240 gen_helper_sve_predtest1(t, d, g);
241 do_pred_flags(t);
242 tcg_temp_free_i32(t);
243}
244
245static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
246{
247 TCGv_ptr dptr = tcg_temp_new_ptr();
248 TCGv_ptr gptr = tcg_temp_new_ptr();
249 TCGv_i32 t;
250
251 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
252 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
253 t = tcg_const_i32(words);
254
255 gen_helper_sve_predtest(t, dptr, gptr, t);
256 tcg_temp_free_ptr(dptr);
257 tcg_temp_free_ptr(gptr);
258
259 do_pred_flags(t);
260 tcg_temp_free_i32(t);
261}
262
028e2a7b
RH
263/* For each element size, the bits within a predicate word that are active. */
264const uint64_t pred_esz_masks[4] = {
265 0xffffffffffffffffull, 0x5555555555555555ull,
266 0x1111111111111111ull, 0x0101010101010101ull
267};
268
39eea561
RH
269/*
270 *** SVE Logical - Unpredicated Group
271 */
272
273static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
274{
275 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
276}
277
278static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
279{
280 if (a->rn == a->rm) { /* MOV */
281 return do_mov_z(s, a->rd, a->rn);
282 } else {
283 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
284 }
285}
286
287static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
288{
289 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
290}
291
292static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 293{
39eea561 294 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 295}
d1822297 296
fea98f9c
RH
297/*
298 *** SVE Integer Arithmetic - Unpredicated Group
299 */
300
301static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
302{
303 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
304}
305
306static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
307{
308 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
309}
310
311static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
312{
313 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
314}
315
316static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
317{
318 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
319}
320
321static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
322{
323 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
324}
325
326static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
327{
328 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
329}
330
f97cfd59
RH
331/*
332 *** SVE Integer Arithmetic - Binary Predicated Group
333 */
334
335static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
336{
337 unsigned vsz = vec_full_reg_size(s);
338 if (fn == NULL) {
339 return false;
340 }
341 if (sve_access_check(s)) {
342 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
343 vec_full_reg_offset(s, a->rn),
344 vec_full_reg_offset(s, a->rm),
345 pred_full_reg_offset(s, a->pg),
346 vsz, vsz, 0, fn);
347 }
348 return true;
349}
350
351#define DO_ZPZZ(NAME, name) \
352static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
353 uint32_t insn) \
354{ \
355 static gen_helper_gvec_4 * const fns[4] = { \
356 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
357 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
358 }; \
359 return do_zpzz_ool(s, a, fns[a->esz]); \
360}
361
362DO_ZPZZ(AND, and)
363DO_ZPZZ(EOR, eor)
364DO_ZPZZ(ORR, orr)
365DO_ZPZZ(BIC, bic)
366
367DO_ZPZZ(ADD, add)
368DO_ZPZZ(SUB, sub)
369
370DO_ZPZZ(SMAX, smax)
371DO_ZPZZ(UMAX, umax)
372DO_ZPZZ(SMIN, smin)
373DO_ZPZZ(UMIN, umin)
374DO_ZPZZ(SABD, sabd)
375DO_ZPZZ(UABD, uabd)
376
377DO_ZPZZ(MUL, mul)
378DO_ZPZZ(SMULH, smulh)
379DO_ZPZZ(UMULH, umulh)
380
27721dbb
RH
381DO_ZPZZ(ASR, asr)
382DO_ZPZZ(LSR, lsr)
383DO_ZPZZ(LSL, lsl)
384
f97cfd59
RH
385static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
386{
387 static gen_helper_gvec_4 * const fns[4] = {
388 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
389 };
390 return do_zpzz_ool(s, a, fns[a->esz]);
391}
392
393static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
394{
395 static gen_helper_gvec_4 * const fns[4] = {
396 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
397 };
398 return do_zpzz_ool(s, a, fns[a->esz]);
399}
400
d3fe4a29
RH
401DO_ZPZZ(SEL, sel)
402
f97cfd59
RH
403#undef DO_ZPZZ
404
afac6d04
RH
405/*
406 *** SVE Integer Arithmetic - Unary Predicated Group
407 */
408
409static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
410{
411 if (fn == NULL) {
412 return false;
413 }
414 if (sve_access_check(s)) {
415 unsigned vsz = vec_full_reg_size(s);
416 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
417 vec_full_reg_offset(s, a->rn),
418 pred_full_reg_offset(s, a->pg),
419 vsz, vsz, 0, fn);
420 }
421 return true;
422}
423
424#define DO_ZPZ(NAME, name) \
425static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
426{ \
427 static gen_helper_gvec_3 * const fns[4] = { \
428 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
429 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
430 }; \
431 return do_zpz_ool(s, a, fns[a->esz]); \
432}
433
434DO_ZPZ(CLS, cls)
435DO_ZPZ(CLZ, clz)
436DO_ZPZ(CNT_zpz, cnt_zpz)
437DO_ZPZ(CNOT, cnot)
438DO_ZPZ(NOT_zpz, not_zpz)
439DO_ZPZ(ABS, abs)
440DO_ZPZ(NEG, neg)
441
442static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
443{
444 static gen_helper_gvec_3 * const fns[4] = {
445 NULL,
446 gen_helper_sve_fabs_h,
447 gen_helper_sve_fabs_s,
448 gen_helper_sve_fabs_d
449 };
450 return do_zpz_ool(s, a, fns[a->esz]);
451}
452
453static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
454{
455 static gen_helper_gvec_3 * const fns[4] = {
456 NULL,
457 gen_helper_sve_fneg_h,
458 gen_helper_sve_fneg_s,
459 gen_helper_sve_fneg_d
460 };
461 return do_zpz_ool(s, a, fns[a->esz]);
462}
463
464static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
465{
466 static gen_helper_gvec_3 * const fns[4] = {
467 NULL,
468 gen_helper_sve_sxtb_h,
469 gen_helper_sve_sxtb_s,
470 gen_helper_sve_sxtb_d
471 };
472 return do_zpz_ool(s, a, fns[a->esz]);
473}
474
475static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
476{
477 static gen_helper_gvec_3 * const fns[4] = {
478 NULL,
479 gen_helper_sve_uxtb_h,
480 gen_helper_sve_uxtb_s,
481 gen_helper_sve_uxtb_d
482 };
483 return do_zpz_ool(s, a, fns[a->esz]);
484}
485
486static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
487{
488 static gen_helper_gvec_3 * const fns[4] = {
489 NULL, NULL,
490 gen_helper_sve_sxth_s,
491 gen_helper_sve_sxth_d
492 };
493 return do_zpz_ool(s, a, fns[a->esz]);
494}
495
496static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
497{
498 static gen_helper_gvec_3 * const fns[4] = {
499 NULL, NULL,
500 gen_helper_sve_uxth_s,
501 gen_helper_sve_uxth_d
502 };
503 return do_zpz_ool(s, a, fns[a->esz]);
504}
505
506static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
507{
508 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
509}
510
511static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
512{
513 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
514}
515
516#undef DO_ZPZ
517
047cec97
RH
518/*
519 *** SVE Integer Reduction Group
520 */
521
522typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
523static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
524 gen_helper_gvec_reduc *fn)
525{
526 unsigned vsz = vec_full_reg_size(s);
527 TCGv_ptr t_zn, t_pg;
528 TCGv_i32 desc;
529 TCGv_i64 temp;
530
531 if (fn == NULL) {
532 return false;
533 }
534 if (!sve_access_check(s)) {
535 return true;
536 }
537
538 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
539 temp = tcg_temp_new_i64();
540 t_zn = tcg_temp_new_ptr();
541 t_pg = tcg_temp_new_ptr();
542
543 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
544 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
545 fn(temp, t_zn, t_pg, desc);
546 tcg_temp_free_ptr(t_zn);
547 tcg_temp_free_ptr(t_pg);
548 tcg_temp_free_i32(desc);
549
550 write_fp_dreg(s, a->rd, temp);
551 tcg_temp_free_i64(temp);
552 return true;
553}
554
555#define DO_VPZ(NAME, name) \
556static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
557{ \
558 static gen_helper_gvec_reduc * const fns[4] = { \
559 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
560 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
561 }; \
562 return do_vpz_ool(s, a, fns[a->esz]); \
563}
564
565DO_VPZ(ORV, orv)
566DO_VPZ(ANDV, andv)
567DO_VPZ(EORV, eorv)
568
569DO_VPZ(UADDV, uaddv)
570DO_VPZ(SMAXV, smaxv)
571DO_VPZ(UMAXV, umaxv)
572DO_VPZ(SMINV, sminv)
573DO_VPZ(UMINV, uminv)
574
575static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
576{
577 static gen_helper_gvec_reduc * const fns[4] = {
578 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
579 gen_helper_sve_saddv_s, NULL
580 };
581 return do_vpz_ool(s, a, fns[a->esz]);
582}
583
584#undef DO_VPZ
585
ccd841c3
RH
586/*
587 *** SVE Shift by Immediate - Predicated Group
588 */
589
590/* Store zero into every active element of Zd. We will use this for two
591 * and three-operand predicated instructions for which logic dictates a
592 * zero result.
593 */
594static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
595{
596 static gen_helper_gvec_2 * const fns[4] = {
597 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
598 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
599 };
600 if (sve_access_check(s)) {
601 unsigned vsz = vec_full_reg_size(s);
602 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
603 pred_full_reg_offset(s, pg),
604 vsz, vsz, 0, fns[esz]);
605 }
606 return true;
607}
608
68459864
RH
609/* Copy Zn into Zd, storing zeros into inactive elements. */
610static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
611{
612 static gen_helper_gvec_3 * const fns[4] = {
613 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
614 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
615 };
616 unsigned vsz = vec_full_reg_size(s);
617 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
618 vec_full_reg_offset(s, rn),
619 pred_full_reg_offset(s, pg),
620 vsz, vsz, 0, fns[esz]);
621}
622
ccd841c3
RH
623static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
624 gen_helper_gvec_3 *fn)
625{
626 if (sve_access_check(s)) {
627 unsigned vsz = vec_full_reg_size(s);
628 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
629 vec_full_reg_offset(s, a->rn),
630 pred_full_reg_offset(s, a->pg),
631 vsz, vsz, a->imm, fn);
632 }
633 return true;
634}
635
636static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
637{
638 static gen_helper_gvec_3 * const fns[4] = {
639 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
640 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
641 };
642 if (a->esz < 0) {
643 /* Invalid tsz encoding -- see tszimm_esz. */
644 return false;
645 }
646 /* Shift by element size is architecturally valid. For
647 arithmetic right-shift, it's the same as by one less. */
648 a->imm = MIN(a->imm, (8 << a->esz) - 1);
649 return do_zpzi_ool(s, a, fns[a->esz]);
650}
651
652static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
653{
654 static gen_helper_gvec_3 * const fns[4] = {
655 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
656 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
657 };
658 if (a->esz < 0) {
659 return false;
660 }
661 /* Shift by element size is architecturally valid.
662 For logical shifts, it is a zeroing operation. */
663 if (a->imm >= (8 << a->esz)) {
664 return do_clr_zp(s, a->rd, a->pg, a->esz);
665 } else {
666 return do_zpzi_ool(s, a, fns[a->esz]);
667 }
668}
669
670static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
671{
672 static gen_helper_gvec_3 * const fns[4] = {
673 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
674 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
675 };
676 if (a->esz < 0) {
677 return false;
678 }
679 /* Shift by element size is architecturally valid.
680 For logical shifts, it is a zeroing operation. */
681 if (a->imm >= (8 << a->esz)) {
682 return do_clr_zp(s, a->rd, a->pg, a->esz);
683 } else {
684 return do_zpzi_ool(s, a, fns[a->esz]);
685 }
686}
687
688static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
689{
690 static gen_helper_gvec_3 * const fns[4] = {
691 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
692 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
693 };
694 if (a->esz < 0) {
695 return false;
696 }
697 /* Shift by element size is architecturally valid. For arithmetic
698 right shift for division, it is a zeroing operation. */
699 if (a->imm >= (8 << a->esz)) {
700 return do_clr_zp(s, a->rd, a->pg, a->esz);
701 } else {
702 return do_zpzi_ool(s, a, fns[a->esz]);
703 }
704}
705
fe7f8dfb
RH
706/*
707 *** SVE Bitwise Shift - Predicated Group
708 */
709
710#define DO_ZPZW(NAME, name) \
711static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
712 uint32_t insn) \
713{ \
714 static gen_helper_gvec_4 * const fns[3] = { \
715 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
716 gen_helper_sve_##name##_zpzw_s, \
717 }; \
718 if (a->esz < 0 || a->esz >= 3) { \
719 return false; \
720 } \
721 return do_zpzz_ool(s, a, fns[a->esz]); \
722}
723
724DO_ZPZW(ASR, asr)
725DO_ZPZW(LSR, lsr)
726DO_ZPZW(LSL, lsl)
727
728#undef DO_ZPZW
729
d9d78dcc
RH
730/*
731 *** SVE Bitwise Shift - Unpredicated Group
732 */
733
734static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
735 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
736 int64_t, uint32_t, uint32_t))
737{
738 if (a->esz < 0) {
739 /* Invalid tsz encoding -- see tszimm_esz. */
740 return false;
741 }
742 if (sve_access_check(s)) {
743 unsigned vsz = vec_full_reg_size(s);
744 /* Shift by element size is architecturally valid. For
745 arithmetic right-shift, it's the same as by one less.
746 Otherwise it is a zeroing operation. */
747 if (a->imm >= 8 << a->esz) {
748 if (asr) {
749 a->imm = (8 << a->esz) - 1;
750 } else {
751 do_dupi_z(s, a->rd, 0);
752 return true;
753 }
754 }
755 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
756 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
757 }
758 return true;
759}
760
761static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
762{
763 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
764}
765
766static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
767{
768 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
769}
770
771static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
772{
773 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
774}
775
776static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
777{
778 if (fn == NULL) {
779 return false;
780 }
781 if (sve_access_check(s)) {
782 unsigned vsz = vec_full_reg_size(s);
783 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
784 vec_full_reg_offset(s, a->rn),
785 vec_full_reg_offset(s, a->rm),
786 vsz, vsz, 0, fn);
787 }
788 return true;
789}
790
791#define DO_ZZW(NAME, name) \
792static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
793 uint32_t insn) \
794{ \
795 static gen_helper_gvec_3 * const fns[4] = { \
796 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
797 gen_helper_sve_##name##_zzw_s, NULL \
798 }; \
799 return do_zzw_ool(s, a, fns[a->esz]); \
800}
801
802DO_ZZW(ASR, asr)
803DO_ZZW(LSR, lsr)
804DO_ZZW(LSL, lsl)
805
806#undef DO_ZZW
807
96a36e4a
RH
808/*
809 *** SVE Integer Multiply-Add Group
810 */
811
812static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
813 gen_helper_gvec_5 *fn)
814{
815 if (sve_access_check(s)) {
816 unsigned vsz = vec_full_reg_size(s);
817 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
818 vec_full_reg_offset(s, a->ra),
819 vec_full_reg_offset(s, a->rn),
820 vec_full_reg_offset(s, a->rm),
821 pred_full_reg_offset(s, a->pg),
822 vsz, vsz, 0, fn);
823 }
824 return true;
825}
826
827#define DO_ZPZZZ(NAME, name) \
828static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
829{ \
830 static gen_helper_gvec_5 * const fns[4] = { \
831 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
832 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
833 }; \
834 return do_zpzzz_ool(s, a, fns[a->esz]); \
835}
836
837DO_ZPZZZ(MLA, mla)
838DO_ZPZZZ(MLS, mls)
839
840#undef DO_ZPZZZ
841
9a56c9c3
RH
842/*
843 *** SVE Index Generation Group
844 */
845
846static void do_index(DisasContext *s, int esz, int rd,
847 TCGv_i64 start, TCGv_i64 incr)
848{
849 unsigned vsz = vec_full_reg_size(s);
850 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
851 TCGv_ptr t_zd = tcg_temp_new_ptr();
852
853 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
854 if (esz == 3) {
855 gen_helper_sve_index_d(t_zd, start, incr, desc);
856 } else {
857 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
858 static index_fn * const fns[3] = {
859 gen_helper_sve_index_b,
860 gen_helper_sve_index_h,
861 gen_helper_sve_index_s,
862 };
863 TCGv_i32 s32 = tcg_temp_new_i32();
864 TCGv_i32 i32 = tcg_temp_new_i32();
865
866 tcg_gen_extrl_i64_i32(s32, start);
867 tcg_gen_extrl_i64_i32(i32, incr);
868 fns[esz](t_zd, s32, i32, desc);
869
870 tcg_temp_free_i32(s32);
871 tcg_temp_free_i32(i32);
872 }
873 tcg_temp_free_ptr(t_zd);
874 tcg_temp_free_i32(desc);
875}
876
877static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
878{
879 if (sve_access_check(s)) {
880 TCGv_i64 start = tcg_const_i64(a->imm1);
881 TCGv_i64 incr = tcg_const_i64(a->imm2);
882 do_index(s, a->esz, a->rd, start, incr);
883 tcg_temp_free_i64(start);
884 tcg_temp_free_i64(incr);
885 }
886 return true;
887}
888
889static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
890{
891 if (sve_access_check(s)) {
892 TCGv_i64 start = tcg_const_i64(a->imm);
893 TCGv_i64 incr = cpu_reg(s, a->rm);
894 do_index(s, a->esz, a->rd, start, incr);
895 tcg_temp_free_i64(start);
896 }
897 return true;
898}
899
900static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
901{
902 if (sve_access_check(s)) {
903 TCGv_i64 start = cpu_reg(s, a->rn);
904 TCGv_i64 incr = tcg_const_i64(a->imm);
905 do_index(s, a->esz, a->rd, start, incr);
906 tcg_temp_free_i64(incr);
907 }
908 return true;
909}
910
911static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
912{
913 if (sve_access_check(s)) {
914 TCGv_i64 start = cpu_reg(s, a->rn);
915 TCGv_i64 incr = cpu_reg(s, a->rm);
916 do_index(s, a->esz, a->rd, start, incr);
917 }
918 return true;
919}
920
96f922cc
RH
921/*
922 *** SVE Stack Allocation Group
923 */
924
925static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
926{
927 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
928 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
929 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
930 return true;
931}
932
933static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
934{
935 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
936 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
937 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
938 return true;
939}
940
941static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
942{
943 TCGv_i64 reg = cpu_reg(s, a->rd);
944 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
945 return true;
946}
947
4b242d9c
RH
948/*
949 *** SVE Compute Vector Address Group
950 */
951
952static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
953{
954 if (sve_access_check(s)) {
955 unsigned vsz = vec_full_reg_size(s);
956 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
957 vec_full_reg_offset(s, a->rn),
958 vec_full_reg_offset(s, a->rm),
959 vsz, vsz, a->imm, fn);
960 }
961 return true;
962}
963
964static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
965{
966 return do_adr(s, a, gen_helper_sve_adr_p32);
967}
968
969static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
970{
971 return do_adr(s, a, gen_helper_sve_adr_p64);
972}
973
974static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
975{
976 return do_adr(s, a, gen_helper_sve_adr_s32);
977}
978
979static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
980{
981 return do_adr(s, a, gen_helper_sve_adr_u32);
982}
983
0762cd42
RH
984/*
985 *** SVE Integer Misc - Unpredicated Group
986 */
987
988static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
989{
990 static gen_helper_gvec_2 * const fns[4] = {
991 NULL,
992 gen_helper_sve_fexpa_h,
993 gen_helper_sve_fexpa_s,
994 gen_helper_sve_fexpa_d,
995 };
996 if (a->esz == 0) {
997 return false;
998 }
999 if (sve_access_check(s)) {
1000 unsigned vsz = vec_full_reg_size(s);
1001 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1002 vec_full_reg_offset(s, a->rn),
1003 vsz, vsz, 0, fns[a->esz]);
1004 }
1005 return true;
1006}
1007
a1f233f2
RH
1008static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1009{
1010 static gen_helper_gvec_3 * const fns[4] = {
1011 NULL,
1012 gen_helper_sve_ftssel_h,
1013 gen_helper_sve_ftssel_s,
1014 gen_helper_sve_ftssel_d,
1015 };
1016 if (a->esz == 0) {
1017 return false;
1018 }
1019 if (sve_access_check(s)) {
1020 unsigned vsz = vec_full_reg_size(s);
1021 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1022 vec_full_reg_offset(s, a->rn),
1023 vec_full_reg_offset(s, a->rm),
1024 vsz, vsz, 0, fns[a->esz]);
1025 }
1026 return true;
1027}
1028
516e246a
RH
1029/*
1030 *** SVE Predicate Logical Operations Group
1031 */
1032
1033static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1034 const GVecGen4 *gvec_op)
1035{
1036 if (!sve_access_check(s)) {
1037 return true;
1038 }
1039
1040 unsigned psz = pred_gvec_reg_size(s);
1041 int dofs = pred_full_reg_offset(s, a->rd);
1042 int nofs = pred_full_reg_offset(s, a->rn);
1043 int mofs = pred_full_reg_offset(s, a->rm);
1044 int gofs = pred_full_reg_offset(s, a->pg);
1045
1046 if (psz == 8) {
1047 /* Do the operation and the flags generation in temps. */
1048 TCGv_i64 pd = tcg_temp_new_i64();
1049 TCGv_i64 pn = tcg_temp_new_i64();
1050 TCGv_i64 pm = tcg_temp_new_i64();
1051 TCGv_i64 pg = tcg_temp_new_i64();
1052
1053 tcg_gen_ld_i64(pn, cpu_env, nofs);
1054 tcg_gen_ld_i64(pm, cpu_env, mofs);
1055 tcg_gen_ld_i64(pg, cpu_env, gofs);
1056
1057 gvec_op->fni8(pd, pn, pm, pg);
1058 tcg_gen_st_i64(pd, cpu_env, dofs);
1059
1060 do_predtest1(pd, pg);
1061
1062 tcg_temp_free_i64(pd);
1063 tcg_temp_free_i64(pn);
1064 tcg_temp_free_i64(pm);
1065 tcg_temp_free_i64(pg);
1066 } else {
1067 /* The operation and flags generation is large. The computation
1068 * of the flags depends on the original contents of the guarding
1069 * predicate. If the destination overwrites the guarding predicate,
1070 * then the easiest way to get this right is to save a copy.
1071 */
1072 int tofs = gofs;
1073 if (a->rd == a->pg) {
1074 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1075 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1076 }
1077
1078 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1079 do_predtest(s, dofs, tofs, psz / 8);
1080 }
1081 return true;
1082}
1083
1084static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1085{
1086 tcg_gen_and_i64(pd, pn, pm);
1087 tcg_gen_and_i64(pd, pd, pg);
1088}
1089
1090static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1091 TCGv_vec pm, TCGv_vec pg)
1092{
1093 tcg_gen_and_vec(vece, pd, pn, pm);
1094 tcg_gen_and_vec(vece, pd, pd, pg);
1095}
1096
1097static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1098{
1099 static const GVecGen4 op = {
1100 .fni8 = gen_and_pg_i64,
1101 .fniv = gen_and_pg_vec,
1102 .fno = gen_helper_sve_and_pppp,
1103 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1104 };
1105 if (a->s) {
1106 return do_pppp_flags(s, a, &op);
1107 } else if (a->rn == a->rm) {
1108 if (a->pg == a->rn) {
1109 return do_mov_p(s, a->rd, a->rn);
1110 } else {
1111 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1112 }
1113 } else if (a->pg == a->rn || a->pg == a->rm) {
1114 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1115 } else {
1116 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1117 }
1118}
1119
1120static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1121{
1122 tcg_gen_andc_i64(pd, pn, pm);
1123 tcg_gen_and_i64(pd, pd, pg);
1124}
1125
1126static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1127 TCGv_vec pm, TCGv_vec pg)
1128{
1129 tcg_gen_andc_vec(vece, pd, pn, pm);
1130 tcg_gen_and_vec(vece, pd, pd, pg);
1131}
1132
1133static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1134{
1135 static const GVecGen4 op = {
1136 .fni8 = gen_bic_pg_i64,
1137 .fniv = gen_bic_pg_vec,
1138 .fno = gen_helper_sve_bic_pppp,
1139 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1140 };
1141 if (a->s) {
1142 return do_pppp_flags(s, a, &op);
1143 } else if (a->pg == a->rn) {
1144 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1145 } else {
1146 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1147 }
1148}
1149
1150static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1151{
1152 tcg_gen_xor_i64(pd, pn, pm);
1153 tcg_gen_and_i64(pd, pd, pg);
1154}
1155
1156static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1157 TCGv_vec pm, TCGv_vec pg)
1158{
1159 tcg_gen_xor_vec(vece, pd, pn, pm);
1160 tcg_gen_and_vec(vece, pd, pd, pg);
1161}
1162
1163static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1164{
1165 static const GVecGen4 op = {
1166 .fni8 = gen_eor_pg_i64,
1167 .fniv = gen_eor_pg_vec,
1168 .fno = gen_helper_sve_eor_pppp,
1169 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1170 };
1171 if (a->s) {
1172 return do_pppp_flags(s, a, &op);
1173 } else {
1174 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1175 }
1176}
1177
1178static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1179{
1180 tcg_gen_and_i64(pn, pn, pg);
1181 tcg_gen_andc_i64(pm, pm, pg);
1182 tcg_gen_or_i64(pd, pn, pm);
1183}
1184
1185static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1186 TCGv_vec pm, TCGv_vec pg)
1187{
1188 tcg_gen_and_vec(vece, pn, pn, pg);
1189 tcg_gen_andc_vec(vece, pm, pm, pg);
1190 tcg_gen_or_vec(vece, pd, pn, pm);
1191}
1192
1193static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1194{
1195 static const GVecGen4 op = {
1196 .fni8 = gen_sel_pg_i64,
1197 .fniv = gen_sel_pg_vec,
1198 .fno = gen_helper_sve_sel_pppp,
1199 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1200 };
1201 if (a->s) {
1202 return false;
1203 } else {
1204 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1205 }
1206}
1207
1208static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1209{
1210 tcg_gen_or_i64(pd, pn, pm);
1211 tcg_gen_and_i64(pd, pd, pg);
1212}
1213
1214static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1215 TCGv_vec pm, TCGv_vec pg)
1216{
1217 tcg_gen_or_vec(vece, pd, pn, pm);
1218 tcg_gen_and_vec(vece, pd, pd, pg);
1219}
1220
1221static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1222{
1223 static const GVecGen4 op = {
1224 .fni8 = gen_orr_pg_i64,
1225 .fniv = gen_orr_pg_vec,
1226 .fno = gen_helper_sve_orr_pppp,
1227 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1228 };
1229 if (a->s) {
1230 return do_pppp_flags(s, a, &op);
1231 } else if (a->pg == a->rn && a->rn == a->rm) {
1232 return do_mov_p(s, a->rd, a->rn);
1233 } else {
1234 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1235 }
1236}
1237
1238static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1239{
1240 tcg_gen_orc_i64(pd, pn, pm);
1241 tcg_gen_and_i64(pd, pd, pg);
1242}
1243
1244static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1245 TCGv_vec pm, TCGv_vec pg)
1246{
1247 tcg_gen_orc_vec(vece, pd, pn, pm);
1248 tcg_gen_and_vec(vece, pd, pd, pg);
1249}
1250
1251static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1252{
1253 static const GVecGen4 op = {
1254 .fni8 = gen_orn_pg_i64,
1255 .fniv = gen_orn_pg_vec,
1256 .fno = gen_helper_sve_orn_pppp,
1257 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1258 };
1259 if (a->s) {
1260 return do_pppp_flags(s, a, &op);
1261 } else {
1262 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1263 }
1264}
1265
1266static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1267{
1268 tcg_gen_or_i64(pd, pn, pm);
1269 tcg_gen_andc_i64(pd, pg, pd);
1270}
1271
1272static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1273 TCGv_vec pm, TCGv_vec pg)
1274{
1275 tcg_gen_or_vec(vece, pd, pn, pm);
1276 tcg_gen_andc_vec(vece, pd, pg, pd);
1277}
1278
1279static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1280{
1281 static const GVecGen4 op = {
1282 .fni8 = gen_nor_pg_i64,
1283 .fniv = gen_nor_pg_vec,
1284 .fno = gen_helper_sve_nor_pppp,
1285 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1286 };
1287 if (a->s) {
1288 return do_pppp_flags(s, a, &op);
1289 } else {
1290 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1291 }
1292}
1293
1294static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1295{
1296 tcg_gen_and_i64(pd, pn, pm);
1297 tcg_gen_andc_i64(pd, pg, pd);
1298}
1299
1300static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1301 TCGv_vec pm, TCGv_vec pg)
1302{
1303 tcg_gen_and_vec(vece, pd, pn, pm);
1304 tcg_gen_andc_vec(vece, pd, pg, pd);
1305}
1306
1307static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1308{
1309 static const GVecGen4 op = {
1310 .fni8 = gen_nand_pg_i64,
1311 .fniv = gen_nand_pg_vec,
1312 .fno = gen_helper_sve_nand_pppp,
1313 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1314 };
1315 if (a->s) {
1316 return do_pppp_flags(s, a, &op);
1317 } else {
1318 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1319 }
1320}
1321
9e18d7a6
RH
1322/*
1323 *** SVE Predicate Misc Group
1324 */
1325
1326static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1327{
1328 if (sve_access_check(s)) {
1329 int nofs = pred_full_reg_offset(s, a->rn);
1330 int gofs = pred_full_reg_offset(s, a->pg);
1331 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1332
1333 if (words == 1) {
1334 TCGv_i64 pn = tcg_temp_new_i64();
1335 TCGv_i64 pg = tcg_temp_new_i64();
1336
1337 tcg_gen_ld_i64(pn, cpu_env, nofs);
1338 tcg_gen_ld_i64(pg, cpu_env, gofs);
1339 do_predtest1(pn, pg);
1340
1341 tcg_temp_free_i64(pn);
1342 tcg_temp_free_i64(pg);
1343 } else {
1344 do_predtest(s, nofs, gofs, words);
1345 }
1346 }
1347 return true;
1348}
1349
028e2a7b
RH
1350/* See the ARM pseudocode DecodePredCount. */
1351static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1352{
1353 unsigned elements = fullsz >> esz;
1354 unsigned bound;
1355
1356 switch (pattern) {
1357 case 0x0: /* POW2 */
1358 return pow2floor(elements);
1359 case 0x1: /* VL1 */
1360 case 0x2: /* VL2 */
1361 case 0x3: /* VL3 */
1362 case 0x4: /* VL4 */
1363 case 0x5: /* VL5 */
1364 case 0x6: /* VL6 */
1365 case 0x7: /* VL7 */
1366 case 0x8: /* VL8 */
1367 bound = pattern;
1368 break;
1369 case 0x9: /* VL16 */
1370 case 0xa: /* VL32 */
1371 case 0xb: /* VL64 */
1372 case 0xc: /* VL128 */
1373 case 0xd: /* VL256 */
1374 bound = 16 << (pattern - 9);
1375 break;
1376 case 0x1d: /* MUL4 */
1377 return elements - elements % 4;
1378 case 0x1e: /* MUL3 */
1379 return elements - elements % 3;
1380 case 0x1f: /* ALL */
1381 return elements;
1382 default: /* #uimm5 */
1383 return 0;
1384 }
1385 return elements >= bound ? bound : 0;
1386}
1387
1388/* This handles all of the predicate initialization instructions,
1389 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1390 * so that decode_pred_count returns 0. For SETFFR, we will have
1391 * set RD == 16 == FFR.
1392 */
1393static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1394{
1395 if (!sve_access_check(s)) {
1396 return true;
1397 }
1398
1399 unsigned fullsz = vec_full_reg_size(s);
1400 unsigned ofs = pred_full_reg_offset(s, rd);
1401 unsigned numelem, setsz, i;
1402 uint64_t word, lastword;
1403 TCGv_i64 t;
1404
1405 numelem = decode_pred_count(fullsz, pat, esz);
1406
1407 /* Determine what we must store into each bit, and how many. */
1408 if (numelem == 0) {
1409 lastword = word = 0;
1410 setsz = fullsz;
1411 } else {
1412 setsz = numelem << esz;
1413 lastword = word = pred_esz_masks[esz];
1414 if (setsz % 64) {
1415 lastword &= ~(-1ull << (setsz % 64));
1416 }
1417 }
1418
1419 t = tcg_temp_new_i64();
1420 if (fullsz <= 64) {
1421 tcg_gen_movi_i64(t, lastword);
1422 tcg_gen_st_i64(t, cpu_env, ofs);
1423 goto done;
1424 }
1425
1426 if (word == lastword) {
1427 unsigned maxsz = size_for_gvec(fullsz / 8);
1428 unsigned oprsz = size_for_gvec(setsz / 8);
1429
1430 if (oprsz * 8 == setsz) {
1431 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1432 goto done;
1433 }
1434 if (oprsz * 8 == setsz + 8) {
1435 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1436 tcg_gen_movi_i64(t, 0);
1437 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1438 goto done;
1439 }
1440 }
1441
1442 setsz /= 8;
1443 fullsz /= 8;
1444
1445 tcg_gen_movi_i64(t, word);
1446 for (i = 0; i < setsz; i += 8) {
1447 tcg_gen_st_i64(t, cpu_env, ofs + i);
1448 }
1449 if (lastword != word) {
1450 tcg_gen_movi_i64(t, lastword);
1451 tcg_gen_st_i64(t, cpu_env, ofs + i);
1452 i += 8;
1453 }
1454 if (i < fullsz) {
1455 tcg_gen_movi_i64(t, 0);
1456 for (; i < fullsz; i += 8) {
1457 tcg_gen_st_i64(t, cpu_env, ofs + i);
1458 }
1459 }
1460
1461 done:
1462 tcg_temp_free_i64(t);
1463
1464 /* PTRUES */
1465 if (setflag) {
1466 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1467 tcg_gen_movi_i32(cpu_CF, word == 0);
1468 tcg_gen_movi_i32(cpu_VF, 0);
1469 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1470 }
1471 return true;
1472}
1473
1474static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1475{
1476 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1477}
1478
1479static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1480{
1481 /* Note pat == 31 is #all, to set all elements. */
1482 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1483}
1484
1485static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1486{
1487 /* Note pat == 32 is #unimp, to set no elements. */
1488 return do_predset(s, 0, a->rd, 32, false);
1489}
1490
1491static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1492{
1493 /* The path through do_pppp_flags is complicated enough to want to avoid
1494 * duplication. Frob the arguments into the form of a predicated AND.
1495 */
1496 arg_rprr_s alt_a = {
1497 .rd = a->rd, .pg = a->pg, .s = a->s,
1498 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1499 };
1500 return trans_AND_pppp(s, &alt_a, insn);
1501}
1502
1503static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1504{
1505 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1506}
1507
1508static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1509{
1510 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1511}
1512
1513static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1514 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1515 TCGv_ptr, TCGv_i32))
1516{
1517 if (!sve_access_check(s)) {
1518 return true;
1519 }
1520
1521 TCGv_ptr t_pd = tcg_temp_new_ptr();
1522 TCGv_ptr t_pg = tcg_temp_new_ptr();
1523 TCGv_i32 t;
1524 unsigned desc;
1525
1526 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1527 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1528
1529 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1530 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1531 t = tcg_const_i32(desc);
1532
1533 gen_fn(t, t_pd, t_pg, t);
1534 tcg_temp_free_ptr(t_pd);
1535 tcg_temp_free_ptr(t_pg);
1536
1537 do_pred_flags(t);
1538 tcg_temp_free_i32(t);
1539 return true;
1540}
1541
1542static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1543{
1544 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1545}
1546
1547static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1548{
1549 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1550}
1551
24e82e68
RH
1552/*
1553 *** SVE Element Count Group
1554 */
1555
1556/* Perform an inline saturating addition of a 32-bit value within
1557 * a 64-bit register. The second operand is known to be positive,
1558 * which halves the comparisions we must perform to bound the result.
1559 */
1560static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1561{
1562 int64_t ibound;
1563 TCGv_i64 bound;
1564 TCGCond cond;
1565
1566 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1567 if (u) {
1568 tcg_gen_ext32u_i64(reg, reg);
1569 } else {
1570 tcg_gen_ext32s_i64(reg, reg);
1571 }
1572 if (d) {
1573 tcg_gen_sub_i64(reg, reg, val);
1574 ibound = (u ? 0 : INT32_MIN);
1575 cond = TCG_COND_LT;
1576 } else {
1577 tcg_gen_add_i64(reg, reg, val);
1578 ibound = (u ? UINT32_MAX : INT32_MAX);
1579 cond = TCG_COND_GT;
1580 }
1581 bound = tcg_const_i64(ibound);
1582 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1583 tcg_temp_free_i64(bound);
1584}
1585
1586/* Similarly with 64-bit values. */
1587static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1588{
1589 TCGv_i64 t0 = tcg_temp_new_i64();
1590 TCGv_i64 t1 = tcg_temp_new_i64();
1591 TCGv_i64 t2;
1592
1593 if (u) {
1594 if (d) {
1595 tcg_gen_sub_i64(t0, reg, val);
1596 tcg_gen_movi_i64(t1, 0);
1597 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1598 } else {
1599 tcg_gen_add_i64(t0, reg, val);
1600 tcg_gen_movi_i64(t1, -1);
1601 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1602 }
1603 } else {
1604 if (d) {
1605 /* Detect signed overflow for subtraction. */
1606 tcg_gen_xor_i64(t0, reg, val);
1607 tcg_gen_sub_i64(t1, reg, val);
1608 tcg_gen_xor_i64(reg, reg, t0);
1609 tcg_gen_and_i64(t0, t0, reg);
1610
1611 /* Bound the result. */
1612 tcg_gen_movi_i64(reg, INT64_MIN);
1613 t2 = tcg_const_i64(0);
1614 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1615 } else {
1616 /* Detect signed overflow for addition. */
1617 tcg_gen_xor_i64(t0, reg, val);
1618 tcg_gen_add_i64(reg, reg, val);
1619 tcg_gen_xor_i64(t1, reg, val);
1620 tcg_gen_andc_i64(t0, t1, t0);
1621
1622 /* Bound the result. */
1623 tcg_gen_movi_i64(t1, INT64_MAX);
1624 t2 = tcg_const_i64(0);
1625 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1626 }
1627 tcg_temp_free_i64(t2);
1628 }
1629 tcg_temp_free_i64(t0);
1630 tcg_temp_free_i64(t1);
1631}
1632
1633/* Similarly with a vector and a scalar operand. */
1634static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1635 TCGv_i64 val, bool u, bool d)
1636{
1637 unsigned vsz = vec_full_reg_size(s);
1638 TCGv_ptr dptr, nptr;
1639 TCGv_i32 t32, desc;
1640 TCGv_i64 t64;
1641
1642 dptr = tcg_temp_new_ptr();
1643 nptr = tcg_temp_new_ptr();
1644 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1645 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1646 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1647
1648 switch (esz) {
1649 case MO_8:
1650 t32 = tcg_temp_new_i32();
1651 tcg_gen_extrl_i64_i32(t32, val);
1652 if (d) {
1653 tcg_gen_neg_i32(t32, t32);
1654 }
1655 if (u) {
1656 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1657 } else {
1658 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1659 }
1660 tcg_temp_free_i32(t32);
1661 break;
1662
1663 case MO_16:
1664 t32 = tcg_temp_new_i32();
1665 tcg_gen_extrl_i64_i32(t32, val);
1666 if (d) {
1667 tcg_gen_neg_i32(t32, t32);
1668 }
1669 if (u) {
1670 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1671 } else {
1672 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1673 }
1674 tcg_temp_free_i32(t32);
1675 break;
1676
1677 case MO_32:
1678 t64 = tcg_temp_new_i64();
1679 if (d) {
1680 tcg_gen_neg_i64(t64, val);
1681 } else {
1682 tcg_gen_mov_i64(t64, val);
1683 }
1684 if (u) {
1685 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1686 } else {
1687 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1688 }
1689 tcg_temp_free_i64(t64);
1690 break;
1691
1692 case MO_64:
1693 if (u) {
1694 if (d) {
1695 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1696 } else {
1697 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1698 }
1699 } else if (d) {
1700 t64 = tcg_temp_new_i64();
1701 tcg_gen_neg_i64(t64, val);
1702 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1703 tcg_temp_free_i64(t64);
1704 } else {
1705 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1706 }
1707 break;
1708
1709 default:
1710 g_assert_not_reached();
1711 }
1712
1713 tcg_temp_free_ptr(dptr);
1714 tcg_temp_free_ptr(nptr);
1715 tcg_temp_free_i32(desc);
1716}
1717
1718static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1719{
1720 if (sve_access_check(s)) {
1721 unsigned fullsz = vec_full_reg_size(s);
1722 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1723 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1724 }
1725 return true;
1726}
1727
1728static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1729{
1730 if (sve_access_check(s)) {
1731 unsigned fullsz = vec_full_reg_size(s);
1732 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1733 int inc = numelem * a->imm * (a->d ? -1 : 1);
1734 TCGv_i64 reg = cpu_reg(s, a->rd);
1735
1736 tcg_gen_addi_i64(reg, reg, inc);
1737 }
1738 return true;
1739}
1740
1741static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1742 uint32_t insn)
1743{
1744 if (!sve_access_check(s)) {
1745 return true;
1746 }
1747
1748 unsigned fullsz = vec_full_reg_size(s);
1749 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1750 int inc = numelem * a->imm;
1751 TCGv_i64 reg = cpu_reg(s, a->rd);
1752
1753 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1754 if (inc == 0) {
1755 if (a->u) {
1756 tcg_gen_ext32u_i64(reg, reg);
1757 } else {
1758 tcg_gen_ext32s_i64(reg, reg);
1759 }
1760 } else {
1761 TCGv_i64 t = tcg_const_i64(inc);
1762 do_sat_addsub_32(reg, t, a->u, a->d);
1763 tcg_temp_free_i64(t);
1764 }
1765 return true;
1766}
1767
1768static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1769 uint32_t insn)
1770{
1771 if (!sve_access_check(s)) {
1772 return true;
1773 }
1774
1775 unsigned fullsz = vec_full_reg_size(s);
1776 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1777 int inc = numelem * a->imm;
1778 TCGv_i64 reg = cpu_reg(s, a->rd);
1779
1780 if (inc != 0) {
1781 TCGv_i64 t = tcg_const_i64(inc);
1782 do_sat_addsub_64(reg, t, a->u, a->d);
1783 tcg_temp_free_i64(t);
1784 }
1785 return true;
1786}
1787
1788static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1789{
1790 if (a->esz == 0) {
1791 return false;
1792 }
1793
1794 unsigned fullsz = vec_full_reg_size(s);
1795 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1796 int inc = numelem * a->imm;
1797
1798 if (inc != 0) {
1799 if (sve_access_check(s)) {
1800 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1801 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1802 vec_full_reg_offset(s, a->rn),
1803 t, fullsz, fullsz);
1804 tcg_temp_free_i64(t);
1805 }
1806 } else {
1807 do_mov_z(s, a->rd, a->rn);
1808 }
1809 return true;
1810}
1811
1812static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1813 uint32_t insn)
1814{
1815 if (a->esz == 0) {
1816 return false;
1817 }
1818
1819 unsigned fullsz = vec_full_reg_size(s);
1820 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1821 int inc = numelem * a->imm;
1822
1823 if (inc != 0) {
1824 if (sve_access_check(s)) {
1825 TCGv_i64 t = tcg_const_i64(inc);
1826 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1827 tcg_temp_free_i64(t);
1828 }
1829 } else {
1830 do_mov_z(s, a->rd, a->rn);
1831 }
1832 return true;
1833}
1834
e1fa1164
RH
1835/*
1836 *** SVE Bitwise Immediate Group
1837 */
1838
1839static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1840{
1841 uint64_t imm;
1842 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1843 extract32(a->dbm, 0, 6),
1844 extract32(a->dbm, 6, 6))) {
1845 return false;
1846 }
1847 if (sve_access_check(s)) {
1848 unsigned vsz = vec_full_reg_size(s);
1849 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1850 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1851 }
1852 return true;
1853}
1854
1855static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1856{
1857 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1858}
1859
1860static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1861{
1862 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1863}
1864
1865static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1866{
1867 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1868}
1869
1870static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1871{
1872 uint64_t imm;
1873 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1874 extract32(a->dbm, 0, 6),
1875 extract32(a->dbm, 6, 6))) {
1876 return false;
1877 }
1878 if (sve_access_check(s)) {
1879 do_dupi_z(s, a->rd, imm);
1880 }
1881 return true;
1882}
1883
f25a2361
RH
1884/*
1885 *** SVE Integer Wide Immediate - Predicated Group
1886 */
1887
1888/* Implement all merging copies. This is used for CPY (immediate),
1889 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1890 */
1891static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1892 TCGv_i64 val)
1893{
1894 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1895 static gen_cpy * const fns[4] = {
1896 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1897 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1898 };
1899 unsigned vsz = vec_full_reg_size(s);
1900 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1901 TCGv_ptr t_zd = tcg_temp_new_ptr();
1902 TCGv_ptr t_zn = tcg_temp_new_ptr();
1903 TCGv_ptr t_pg = tcg_temp_new_ptr();
1904
1905 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1906 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1907 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1908
1909 fns[esz](t_zd, t_zn, t_pg, val, desc);
1910
1911 tcg_temp_free_ptr(t_zd);
1912 tcg_temp_free_ptr(t_zn);
1913 tcg_temp_free_ptr(t_pg);
1914 tcg_temp_free_i32(desc);
1915}
1916
1917static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1918{
1919 if (a->esz == 0) {
1920 return false;
1921 }
1922 if (sve_access_check(s)) {
1923 /* Decode the VFP immediate. */
1924 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1925 TCGv_i64 t_imm = tcg_const_i64(imm);
1926 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1927 tcg_temp_free_i64(t_imm);
1928 }
1929 return true;
1930}
1931
1932static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1933{
1934 if (a->esz == 0 && extract32(insn, 13, 1)) {
1935 return false;
1936 }
1937 if (sve_access_check(s)) {
1938 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1939 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1940 tcg_temp_free_i64(t_imm);
1941 }
1942 return true;
1943}
1944
1945static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1946{
1947 static gen_helper_gvec_2i * const fns[4] = {
1948 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1949 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1950 };
1951
1952 if (a->esz == 0 && extract32(insn, 13, 1)) {
1953 return false;
1954 }
1955 if (sve_access_check(s)) {
1956 unsigned vsz = vec_full_reg_size(s);
1957 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1958 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1959 pred_full_reg_offset(s, a->pg),
1960 t_imm, vsz, vsz, 0, fns[a->esz]);
1961 tcg_temp_free_i64(t_imm);
1962 }
1963 return true;
1964}
1965
b94f8f60
RH
1966/*
1967 *** SVE Permute Extract Group
1968 */
1969
1970static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1971{
1972 if (!sve_access_check(s)) {
1973 return true;
1974 }
1975
1976 unsigned vsz = vec_full_reg_size(s);
1977 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1978 unsigned n_siz = vsz - n_ofs;
1979 unsigned d = vec_full_reg_offset(s, a->rd);
1980 unsigned n = vec_full_reg_offset(s, a->rn);
1981 unsigned m = vec_full_reg_offset(s, a->rm);
1982
1983 /* Use host vector move insns if we have appropriate sizes
1984 * and no unfortunate overlap.
1985 */
1986 if (m != d
1987 && n_ofs == size_for_gvec(n_ofs)
1988 && n_siz == size_for_gvec(n_siz)
1989 && (d != n || n_siz <= n_ofs)) {
1990 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1991 if (n_ofs != 0) {
1992 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1993 }
1994 } else {
1995 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1996 }
1997 return true;
1998}
1999
30562ab7
RH
2000/*
2001 *** SVE Permute - Unpredicated Group
2002 */
2003
2004static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2005{
2006 if (sve_access_check(s)) {
2007 unsigned vsz = vec_full_reg_size(s);
2008 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2009 vsz, vsz, cpu_reg_sp(s, a->rn));
2010 }
2011 return true;
2012}
2013
2014static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2015{
2016 if ((a->imm & 0x1f) == 0) {
2017 return false;
2018 }
2019 if (sve_access_check(s)) {
2020 unsigned vsz = vec_full_reg_size(s);
2021 unsigned dofs = vec_full_reg_offset(s, a->rd);
2022 unsigned esz, index;
2023
2024 esz = ctz32(a->imm);
2025 index = a->imm >> (esz + 1);
2026
2027 if ((index << esz) < vsz) {
2028 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2029 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2030 } else {
2031 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2032 }
2033 }
2034 return true;
2035}
2036
2037static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2038{
2039 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2040 static gen_insr * const fns[4] = {
2041 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2042 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2043 };
2044 unsigned vsz = vec_full_reg_size(s);
2045 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2046 TCGv_ptr t_zd = tcg_temp_new_ptr();
2047 TCGv_ptr t_zn = tcg_temp_new_ptr();
2048
2049 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2050 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2051
2052 fns[a->esz](t_zd, t_zn, val, desc);
2053
2054 tcg_temp_free_ptr(t_zd);
2055 tcg_temp_free_ptr(t_zn);
2056 tcg_temp_free_i32(desc);
2057}
2058
2059static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2060{
2061 if (sve_access_check(s)) {
2062 TCGv_i64 t = tcg_temp_new_i64();
2063 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2064 do_insr_i64(s, a, t);
2065 tcg_temp_free_i64(t);
2066 }
2067 return true;
2068}
2069
2070static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2071{
2072 if (sve_access_check(s)) {
2073 do_insr_i64(s, a, cpu_reg(s, a->rm));
2074 }
2075 return true;
2076}
2077
2078static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2079{
2080 static gen_helper_gvec_2 * const fns[4] = {
2081 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2082 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2083 };
2084
2085 if (sve_access_check(s)) {
2086 unsigned vsz = vec_full_reg_size(s);
2087 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2088 vec_full_reg_offset(s, a->rn),
2089 vsz, vsz, 0, fns[a->esz]);
2090 }
2091 return true;
2092}
2093
2094static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2095{
2096 static gen_helper_gvec_3 * const fns[4] = {
2097 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2098 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2099 };
2100
2101 if (sve_access_check(s)) {
2102 unsigned vsz = vec_full_reg_size(s);
2103 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2104 vec_full_reg_offset(s, a->rn),
2105 vec_full_reg_offset(s, a->rm),
2106 vsz, vsz, 0, fns[a->esz]);
2107 }
2108 return true;
2109}
2110
2111static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2112{
2113 static gen_helper_gvec_2 * const fns[4][2] = {
2114 { NULL, NULL },
2115 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2116 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2117 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2118 };
2119
2120 if (a->esz == 0) {
2121 return false;
2122 }
2123 if (sve_access_check(s)) {
2124 unsigned vsz = vec_full_reg_size(s);
2125 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2126 vec_full_reg_offset(s, a->rn)
2127 + (a->h ? vsz / 2 : 0),
2128 vsz, vsz, 0, fns[a->esz][a->u]);
2129 }
2130 return true;
2131}
2132
d731d8cb
RH
2133/*
2134 *** SVE Permute - Predicates Group
2135 */
2136
2137static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2138 gen_helper_gvec_3 *fn)
2139{
2140 if (!sve_access_check(s)) {
2141 return true;
2142 }
2143
2144 unsigned vsz = pred_full_reg_size(s);
2145
2146 /* Predicate sizes may be smaller and cannot use simd_desc.
2147 We cannot round up, as we do elsewhere, because we need
2148 the exact size for ZIP2 and REV. We retain the style for
2149 the other helpers for consistency. */
2150 TCGv_ptr t_d = tcg_temp_new_ptr();
2151 TCGv_ptr t_n = tcg_temp_new_ptr();
2152 TCGv_ptr t_m = tcg_temp_new_ptr();
2153 TCGv_i32 t_desc;
2154 int desc;
2155
2156 desc = vsz - 2;
2157 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2158 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2159
2160 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2161 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2162 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2163 t_desc = tcg_const_i32(desc);
2164
2165 fn(t_d, t_n, t_m, t_desc);
2166
2167 tcg_temp_free_ptr(t_d);
2168 tcg_temp_free_ptr(t_n);
2169 tcg_temp_free_ptr(t_m);
2170 tcg_temp_free_i32(t_desc);
2171 return true;
2172}
2173
2174static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2175 gen_helper_gvec_2 *fn)
2176{
2177 if (!sve_access_check(s)) {
2178 return true;
2179 }
2180
2181 unsigned vsz = pred_full_reg_size(s);
2182 TCGv_ptr t_d = tcg_temp_new_ptr();
2183 TCGv_ptr t_n = tcg_temp_new_ptr();
2184 TCGv_i32 t_desc;
2185 int desc;
2186
2187 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2188 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2189
2190 /* Predicate sizes may be smaller and cannot use simd_desc.
2191 We cannot round up, as we do elsewhere, because we need
2192 the exact size for ZIP2 and REV. We retain the style for
2193 the other helpers for consistency. */
2194
2195 desc = vsz - 2;
2196 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2197 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2198 t_desc = tcg_const_i32(desc);
2199
2200 fn(t_d, t_n, t_desc);
2201
2202 tcg_temp_free_i32(t_desc);
2203 tcg_temp_free_ptr(t_d);
2204 tcg_temp_free_ptr(t_n);
2205 return true;
2206}
2207
2208static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2209{
2210 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2211}
2212
2213static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2214{
2215 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2216}
2217
2218static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2219{
2220 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2221}
2222
2223static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2224{
2225 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2226}
2227
2228static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2229{
2230 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2231}
2232
2233static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2234{
2235 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2236}
2237
2238static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2239{
2240 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2241}
2242
2243static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2244{
2245 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2246}
2247
2248static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2249{
2250 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2251}
2252
234b48e9
RH
2253/*
2254 *** SVE Permute - Interleaving Group
2255 */
2256
2257static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2258{
2259 static gen_helper_gvec_3 * const fns[4] = {
2260 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2261 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2262 };
2263
2264 if (sve_access_check(s)) {
2265 unsigned vsz = vec_full_reg_size(s);
2266 unsigned high_ofs = high ? vsz / 2 : 0;
2267 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2268 vec_full_reg_offset(s, a->rn) + high_ofs,
2269 vec_full_reg_offset(s, a->rm) + high_ofs,
2270 vsz, vsz, 0, fns[a->esz]);
2271 }
2272 return true;
2273}
2274
2275static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2276 gen_helper_gvec_3 *fn)
2277{
2278 if (sve_access_check(s)) {
2279 unsigned vsz = vec_full_reg_size(s);
2280 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2281 vec_full_reg_offset(s, a->rn),
2282 vec_full_reg_offset(s, a->rm),
2283 vsz, vsz, data, fn);
2284 }
2285 return true;
2286}
2287
2288static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2289{
2290 return do_zip(s, a, false);
2291}
2292
2293static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2294{
2295 return do_zip(s, a, true);
2296}
2297
2298static gen_helper_gvec_3 * const uzp_fns[4] = {
2299 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2300 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2301};
2302
2303static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2304{
2305 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2306}
2307
2308static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2309{
2310 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2311}
2312
2313static gen_helper_gvec_3 * const trn_fns[4] = {
2314 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2315 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2316};
2317
2318static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2319{
2320 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2321}
2322
2323static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2324{
2325 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2326}
2327
3ca879ae
RH
2328/*
2329 *** SVE Permute Vector - Predicated Group
2330 */
2331
2332static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2333{
2334 static gen_helper_gvec_3 * const fns[4] = {
2335 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2336 };
2337 return do_zpz_ool(s, a, fns[a->esz]);
2338}
2339
ef23cb72
RH
2340/* Call the helper that computes the ARM LastActiveElement pseudocode
2341 * function, scaled by the element size. This includes the not found
2342 * indication; e.g. not found for esz=3 is -8.
2343 */
2344static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2345{
2346 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2347 * round up, as we do elsewhere, because we need the exact size.
2348 */
2349 TCGv_ptr t_p = tcg_temp_new_ptr();
2350 TCGv_i32 t_desc;
2351 unsigned vsz = pred_full_reg_size(s);
2352 unsigned desc;
2353
2354 desc = vsz - 2;
2355 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2356
2357 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2358 t_desc = tcg_const_i32(desc);
2359
2360 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2361
2362 tcg_temp_free_i32(t_desc);
2363 tcg_temp_free_ptr(t_p);
2364}
2365
2366/* Increment LAST to the offset of the next element in the vector,
2367 * wrapping around to 0.
2368 */
2369static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2370{
2371 unsigned vsz = vec_full_reg_size(s);
2372
2373 tcg_gen_addi_i32(last, last, 1 << esz);
2374 if (is_power_of_2(vsz)) {
2375 tcg_gen_andi_i32(last, last, vsz - 1);
2376 } else {
2377 TCGv_i32 max = tcg_const_i32(vsz);
2378 TCGv_i32 zero = tcg_const_i32(0);
2379 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2380 tcg_temp_free_i32(max);
2381 tcg_temp_free_i32(zero);
2382 }
2383}
2384
2385/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2386static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2387{
2388 unsigned vsz = vec_full_reg_size(s);
2389
2390 if (is_power_of_2(vsz)) {
2391 tcg_gen_andi_i32(last, last, vsz - 1);
2392 } else {
2393 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2394 TCGv_i32 zero = tcg_const_i32(0);
2395 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2396 tcg_temp_free_i32(max);
2397 tcg_temp_free_i32(zero);
2398 }
2399}
2400
2401/* Load an unsigned element of ESZ from BASE+OFS. */
2402static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2403{
2404 TCGv_i64 r = tcg_temp_new_i64();
2405
2406 switch (esz) {
2407 case 0:
2408 tcg_gen_ld8u_i64(r, base, ofs);
2409 break;
2410 case 1:
2411 tcg_gen_ld16u_i64(r, base, ofs);
2412 break;
2413 case 2:
2414 tcg_gen_ld32u_i64(r, base, ofs);
2415 break;
2416 case 3:
2417 tcg_gen_ld_i64(r, base, ofs);
2418 break;
2419 default:
2420 g_assert_not_reached();
2421 }
2422 return r;
2423}
2424
2425/* Load an unsigned element of ESZ from RM[LAST]. */
2426static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2427 int rm, int esz)
2428{
2429 TCGv_ptr p = tcg_temp_new_ptr();
2430 TCGv_i64 r;
2431
2432 /* Convert offset into vector into offset into ENV.
2433 * The final adjustment for the vector register base
2434 * is added via constant offset to the load.
2435 */
2436#ifdef HOST_WORDS_BIGENDIAN
2437 /* Adjust for element ordering. See vec_reg_offset. */
2438 if (esz < 3) {
2439 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2440 }
2441#endif
2442 tcg_gen_ext_i32_ptr(p, last);
2443 tcg_gen_add_ptr(p, p, cpu_env);
2444
2445 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2446 tcg_temp_free_ptr(p);
2447
2448 return r;
2449}
2450
2451/* Compute CLAST for a Zreg. */
2452static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2453{
2454 TCGv_i32 last;
2455 TCGLabel *over;
2456 TCGv_i64 ele;
2457 unsigned vsz, esz = a->esz;
2458
2459 if (!sve_access_check(s)) {
2460 return true;
2461 }
2462
2463 last = tcg_temp_local_new_i32();
2464 over = gen_new_label();
2465
2466 find_last_active(s, last, esz, a->pg);
2467
2468 /* There is of course no movcond for a 2048-bit vector,
2469 * so we must branch over the actual store.
2470 */
2471 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2472
2473 if (!before) {
2474 incr_last_active(s, last, esz);
2475 }
2476
2477 ele = load_last_active(s, last, a->rm, esz);
2478 tcg_temp_free_i32(last);
2479
2480 vsz = vec_full_reg_size(s);
2481 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2482 tcg_temp_free_i64(ele);
2483
2484 /* If this insn used MOVPRFX, we may need a second move. */
2485 if (a->rd != a->rn) {
2486 TCGLabel *done = gen_new_label();
2487 tcg_gen_br(done);
2488
2489 gen_set_label(over);
2490 do_mov_z(s, a->rd, a->rn);
2491
2492 gen_set_label(done);
2493 } else {
2494 gen_set_label(over);
2495 }
2496 return true;
2497}
2498
2499static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2500{
2501 return do_clast_vector(s, a, false);
2502}
2503
2504static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2505{
2506 return do_clast_vector(s, a, true);
2507}
2508
2509/* Compute CLAST for a scalar. */
2510static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2511 bool before, TCGv_i64 reg_val)
2512{
2513 TCGv_i32 last = tcg_temp_new_i32();
2514 TCGv_i64 ele, cmp, zero;
2515
2516 find_last_active(s, last, esz, pg);
2517
2518 /* Extend the original value of last prior to incrementing. */
2519 cmp = tcg_temp_new_i64();
2520 tcg_gen_ext_i32_i64(cmp, last);
2521
2522 if (!before) {
2523 incr_last_active(s, last, esz);
2524 }
2525
2526 /* The conceit here is that while last < 0 indicates not found, after
2527 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2528 * from which we can load garbage. We then discard the garbage with
2529 * a conditional move.
2530 */
2531 ele = load_last_active(s, last, rm, esz);
2532 tcg_temp_free_i32(last);
2533
2534 zero = tcg_const_i64(0);
2535 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2536
2537 tcg_temp_free_i64(zero);
2538 tcg_temp_free_i64(cmp);
2539 tcg_temp_free_i64(ele);
2540}
2541
2542/* Compute CLAST for a Vreg. */
2543static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2544{
2545 if (sve_access_check(s)) {
2546 int esz = a->esz;
2547 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2548 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2549
2550 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2551 write_fp_dreg(s, a->rd, reg);
2552 tcg_temp_free_i64(reg);
2553 }
2554 return true;
2555}
2556
2557static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2558{
2559 return do_clast_fp(s, a, false);
2560}
2561
2562static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2563{
2564 return do_clast_fp(s, a, true);
2565}
2566
2567/* Compute CLAST for a Xreg. */
2568static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2569{
2570 TCGv_i64 reg;
2571
2572 if (!sve_access_check(s)) {
2573 return true;
2574 }
2575
2576 reg = cpu_reg(s, a->rd);
2577 switch (a->esz) {
2578 case 0:
2579 tcg_gen_ext8u_i64(reg, reg);
2580 break;
2581 case 1:
2582 tcg_gen_ext16u_i64(reg, reg);
2583 break;
2584 case 2:
2585 tcg_gen_ext32u_i64(reg, reg);
2586 break;
2587 case 3:
2588 break;
2589 default:
2590 g_assert_not_reached();
2591 }
2592
2593 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2594 return true;
2595}
2596
2597static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2598{
2599 return do_clast_general(s, a, false);
2600}
2601
2602static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2603{
2604 return do_clast_general(s, a, true);
2605}
2606
2607/* Compute LAST for a scalar. */
2608static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2609 int pg, int rm, bool before)
2610{
2611 TCGv_i32 last = tcg_temp_new_i32();
2612 TCGv_i64 ret;
2613
2614 find_last_active(s, last, esz, pg);
2615 if (before) {
2616 wrap_last_active(s, last, esz);
2617 } else {
2618 incr_last_active(s, last, esz);
2619 }
2620
2621 ret = load_last_active(s, last, rm, esz);
2622 tcg_temp_free_i32(last);
2623 return ret;
2624}
2625
2626/* Compute LAST for a Vreg. */
2627static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2628{
2629 if (sve_access_check(s)) {
2630 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2631 write_fp_dreg(s, a->rd, val);
2632 tcg_temp_free_i64(val);
2633 }
2634 return true;
2635}
2636
2637static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2638{
2639 return do_last_fp(s, a, false);
2640}
2641
2642static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2643{
2644 return do_last_fp(s, a, true);
2645}
2646
2647/* Compute LAST for a Xreg. */
2648static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2649{
2650 if (sve_access_check(s)) {
2651 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2652 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2653 tcg_temp_free_i64(val);
2654 }
2655 return true;
2656}
2657
2658static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2659{
2660 return do_last_general(s, a, false);
2661}
2662
2663static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2664{
2665 return do_last_general(s, a, true);
2666}
2667
792a5578
RH
2668static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2669{
2670 if (sve_access_check(s)) {
2671 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2672 }
2673 return true;
2674}
2675
2676static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2677{
2678 if (sve_access_check(s)) {
2679 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2680 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2681 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2682 tcg_temp_free_i64(t);
2683 }
2684 return true;
2685}
2686
dae8fb90
RH
2687static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2688{
2689 static gen_helper_gvec_3 * const fns[4] = {
2690 NULL,
2691 gen_helper_sve_revb_h,
2692 gen_helper_sve_revb_s,
2693 gen_helper_sve_revb_d,
2694 };
2695 return do_zpz_ool(s, a, fns[a->esz]);
2696}
2697
2698static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2699{
2700 static gen_helper_gvec_3 * const fns[4] = {
2701 NULL,
2702 NULL,
2703 gen_helper_sve_revh_s,
2704 gen_helper_sve_revh_d,
2705 };
2706 return do_zpz_ool(s, a, fns[a->esz]);
2707}
2708
2709static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2710{
2711 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2712}
2713
2714static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2715{
2716 static gen_helper_gvec_3 * const fns[4] = {
2717 gen_helper_sve_rbit_b,
2718 gen_helper_sve_rbit_h,
2719 gen_helper_sve_rbit_s,
2720 gen_helper_sve_rbit_d,
2721 };
2722 return do_zpz_ool(s, a, fns[a->esz]);
2723}
2724
b48ff240
RH
2725static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2726{
2727 if (sve_access_check(s)) {
2728 unsigned vsz = vec_full_reg_size(s);
2729 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2730 vec_full_reg_offset(s, a->rn),
2731 vec_full_reg_offset(s, a->rm),
2732 pred_full_reg_offset(s, a->pg),
2733 vsz, vsz, a->esz, gen_helper_sve_splice);
2734 }
2735 return true;
2736}
2737
757f9cff
RH
2738/*
2739 *** SVE Integer Compare - Vectors Group
2740 */
2741
2742static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2743 gen_helper_gvec_flags_4 *gen_fn)
2744{
2745 TCGv_ptr pd, zn, zm, pg;
2746 unsigned vsz;
2747 TCGv_i32 t;
2748
2749 if (gen_fn == NULL) {
2750 return false;
2751 }
2752 if (!sve_access_check(s)) {
2753 return true;
2754 }
2755
2756 vsz = vec_full_reg_size(s);
2757 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2758 pd = tcg_temp_new_ptr();
2759 zn = tcg_temp_new_ptr();
2760 zm = tcg_temp_new_ptr();
2761 pg = tcg_temp_new_ptr();
2762
2763 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2764 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2765 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2766 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2767
2768 gen_fn(t, pd, zn, zm, pg, t);
2769
2770 tcg_temp_free_ptr(pd);
2771 tcg_temp_free_ptr(zn);
2772 tcg_temp_free_ptr(zm);
2773 tcg_temp_free_ptr(pg);
2774
2775 do_pred_flags(t);
2776
2777 tcg_temp_free_i32(t);
2778 return true;
2779}
2780
2781#define DO_PPZZ(NAME, name) \
2782static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2783 uint32_t insn) \
2784{ \
2785 static gen_helper_gvec_flags_4 * const fns[4] = { \
2786 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2787 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2788 }; \
2789 return do_ppzz_flags(s, a, fns[a->esz]); \
2790}
2791
2792DO_PPZZ(CMPEQ, cmpeq)
2793DO_PPZZ(CMPNE, cmpne)
2794DO_PPZZ(CMPGT, cmpgt)
2795DO_PPZZ(CMPGE, cmpge)
2796DO_PPZZ(CMPHI, cmphi)
2797DO_PPZZ(CMPHS, cmphs)
2798
2799#undef DO_PPZZ
2800
2801#define DO_PPZW(NAME, name) \
2802static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2803 uint32_t insn) \
2804{ \
2805 static gen_helper_gvec_flags_4 * const fns[4] = { \
2806 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2807 gen_helper_sve_##name##_ppzw_s, NULL \
2808 }; \
2809 return do_ppzz_flags(s, a, fns[a->esz]); \
2810}
2811
2812DO_PPZW(CMPEQ, cmpeq)
2813DO_PPZW(CMPNE, cmpne)
2814DO_PPZW(CMPGT, cmpgt)
2815DO_PPZW(CMPGE, cmpge)
2816DO_PPZW(CMPHI, cmphi)
2817DO_PPZW(CMPHS, cmphs)
2818DO_PPZW(CMPLT, cmplt)
2819DO_PPZW(CMPLE, cmple)
2820DO_PPZW(CMPLO, cmplo)
2821DO_PPZW(CMPLS, cmpls)
2822
2823#undef DO_PPZW
2824
38cadeba
RH
2825/*
2826 *** SVE Integer Compare - Immediate Groups
2827 */
2828
2829static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2830 gen_helper_gvec_flags_3 *gen_fn)
2831{
2832 TCGv_ptr pd, zn, pg;
2833 unsigned vsz;
2834 TCGv_i32 t;
2835
2836 if (gen_fn == NULL) {
2837 return false;
2838 }
2839 if (!sve_access_check(s)) {
2840 return true;
2841 }
2842
2843 vsz = vec_full_reg_size(s);
2844 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2845 pd = tcg_temp_new_ptr();
2846 zn = tcg_temp_new_ptr();
2847 pg = tcg_temp_new_ptr();
2848
2849 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2850 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2851 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2852
2853 gen_fn(t, pd, zn, pg, t);
2854
2855 tcg_temp_free_ptr(pd);
2856 tcg_temp_free_ptr(zn);
2857 tcg_temp_free_ptr(pg);
2858
2859 do_pred_flags(t);
2860
2861 tcg_temp_free_i32(t);
2862 return true;
2863}
2864
2865#define DO_PPZI(NAME, name) \
2866static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2867 uint32_t insn) \
2868{ \
2869 static gen_helper_gvec_flags_3 * const fns[4] = { \
2870 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2871 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2872 }; \
2873 return do_ppzi_flags(s, a, fns[a->esz]); \
2874}
2875
2876DO_PPZI(CMPEQ, cmpeq)
2877DO_PPZI(CMPNE, cmpne)
2878DO_PPZI(CMPGT, cmpgt)
2879DO_PPZI(CMPGE, cmpge)
2880DO_PPZI(CMPHI, cmphi)
2881DO_PPZI(CMPHS, cmphs)
2882DO_PPZI(CMPLT, cmplt)
2883DO_PPZI(CMPLE, cmple)
2884DO_PPZI(CMPLO, cmplo)
2885DO_PPZI(CMPLS, cmpls)
2886
2887#undef DO_PPZI
2888
35da316f
RH
2889/*
2890 *** SVE Partition Break Group
2891 */
2892
2893static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2894 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2895{
2896 if (!sve_access_check(s)) {
2897 return true;
2898 }
2899
2900 unsigned vsz = pred_full_reg_size(s);
2901
2902 /* Predicate sizes may be smaller and cannot use simd_desc. */
2903 TCGv_ptr d = tcg_temp_new_ptr();
2904 TCGv_ptr n = tcg_temp_new_ptr();
2905 TCGv_ptr m = tcg_temp_new_ptr();
2906 TCGv_ptr g = tcg_temp_new_ptr();
2907 TCGv_i32 t = tcg_const_i32(vsz - 2);
2908
2909 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2910 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2911 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2912 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2913
2914 if (a->s) {
2915 fn_s(t, d, n, m, g, t);
2916 do_pred_flags(t);
2917 } else {
2918 fn(d, n, m, g, t);
2919 }
2920 tcg_temp_free_ptr(d);
2921 tcg_temp_free_ptr(n);
2922 tcg_temp_free_ptr(m);
2923 tcg_temp_free_ptr(g);
2924 tcg_temp_free_i32(t);
2925 return true;
2926}
2927
2928static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2929 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2930{
2931 if (!sve_access_check(s)) {
2932 return true;
2933 }
2934
2935 unsigned vsz = pred_full_reg_size(s);
2936
2937 /* Predicate sizes may be smaller and cannot use simd_desc. */
2938 TCGv_ptr d = tcg_temp_new_ptr();
2939 TCGv_ptr n = tcg_temp_new_ptr();
2940 TCGv_ptr g = tcg_temp_new_ptr();
2941 TCGv_i32 t = tcg_const_i32(vsz - 2);
2942
2943 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2944 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2945 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2946
2947 if (a->s) {
2948 fn_s(t, d, n, g, t);
2949 do_pred_flags(t);
2950 } else {
2951 fn(d, n, g, t);
2952 }
2953 tcg_temp_free_ptr(d);
2954 tcg_temp_free_ptr(n);
2955 tcg_temp_free_ptr(g);
2956 tcg_temp_free_i32(t);
2957 return true;
2958}
2959
2960static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2961{
2962 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2963}
2964
2965static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2966{
2967 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2968}
2969
2970static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2971{
2972 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2973}
2974
2975static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2976{
2977 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2978}
2979
2980static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2981{
2982 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2983}
2984
2985static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2986{
2987 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2988}
2989
2990static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2991{
2992 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2993}
2994
9ee3a611
RH
2995/*
2996 *** SVE Predicate Count Group
2997 */
2998
2999static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3000{
3001 unsigned psz = pred_full_reg_size(s);
3002
3003 if (psz <= 8) {
3004 uint64_t psz_mask;
3005
3006 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3007 if (pn != pg) {
3008 TCGv_i64 g = tcg_temp_new_i64();
3009 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3010 tcg_gen_and_i64(val, val, g);
3011 tcg_temp_free_i64(g);
3012 }
3013
3014 /* Reduce the pred_esz_masks value simply to reduce the
3015 * size of the code generated here.
3016 */
3017 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3018 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3019
3020 tcg_gen_ctpop_i64(val, val);
3021 } else {
3022 TCGv_ptr t_pn = tcg_temp_new_ptr();
3023 TCGv_ptr t_pg = tcg_temp_new_ptr();
3024 unsigned desc;
3025 TCGv_i32 t_desc;
3026
3027 desc = psz - 2;
3028 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3029
3030 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3031 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3032 t_desc = tcg_const_i32(desc);
3033
3034 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3035 tcg_temp_free_ptr(t_pn);
3036 tcg_temp_free_ptr(t_pg);
3037 tcg_temp_free_i32(t_desc);
3038 }
3039}
3040
3041static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3042{
3043 if (sve_access_check(s)) {
3044 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3045 }
3046 return true;
3047}
3048
3049static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3050 uint32_t insn)
3051{
3052 if (sve_access_check(s)) {
3053 TCGv_i64 reg = cpu_reg(s, a->rd);
3054 TCGv_i64 val = tcg_temp_new_i64();
3055
3056 do_cntp(s, val, a->esz, a->pg, a->pg);
3057 if (a->d) {
3058 tcg_gen_sub_i64(reg, reg, val);
3059 } else {
3060 tcg_gen_add_i64(reg, reg, val);
3061 }
3062 tcg_temp_free_i64(val);
3063 }
3064 return true;
3065}
3066
3067static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3068 uint32_t insn)
3069{
3070 if (a->esz == 0) {
3071 return false;
3072 }
3073 if (sve_access_check(s)) {
3074 unsigned vsz = vec_full_reg_size(s);
3075 TCGv_i64 val = tcg_temp_new_i64();
3076 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3077
3078 do_cntp(s, val, a->esz, a->pg, a->pg);
3079 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3080 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3081 }
3082 return true;
3083}
3084
3085static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3086 uint32_t insn)
3087{
3088 if (sve_access_check(s)) {
3089 TCGv_i64 reg = cpu_reg(s, a->rd);
3090 TCGv_i64 val = tcg_temp_new_i64();
3091
3092 do_cntp(s, val, a->esz, a->pg, a->pg);
3093 do_sat_addsub_32(reg, val, a->u, a->d);
3094 }
3095 return true;
3096}
3097
3098static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3099 uint32_t insn)
3100{
3101 if (sve_access_check(s)) {
3102 TCGv_i64 reg = cpu_reg(s, a->rd);
3103 TCGv_i64 val = tcg_temp_new_i64();
3104
3105 do_cntp(s, val, a->esz, a->pg, a->pg);
3106 do_sat_addsub_64(reg, val, a->u, a->d);
3107 }
3108 return true;
3109}
3110
3111static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3112 uint32_t insn)
3113{
3114 if (a->esz == 0) {
3115 return false;
3116 }
3117 if (sve_access_check(s)) {
3118 TCGv_i64 val = tcg_temp_new_i64();
3119 do_cntp(s, val, a->esz, a->pg, a->pg);
3120 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3121 }
3122 return true;
3123}
3124
caf1cefc
RH
3125/*
3126 *** SVE Integer Compare Scalars Group
3127 */
3128
3129static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3130{
3131 if (!sve_access_check(s)) {
3132 return true;
3133 }
3134
3135 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3136 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3137 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3138 TCGv_i64 cmp = tcg_temp_new_i64();
3139
3140 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3141 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3142 tcg_temp_free_i64(cmp);
3143
3144 /* VF = !NF & !CF. */
3145 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3146 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3147
3148 /* Both NF and VF actually look at bit 31. */
3149 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3150 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3151 return true;
3152}
3153
3154static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3155{
3156 if (!sve_access_check(s)) {
3157 return true;
3158 }
3159
3160 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3161 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3162 TCGv_i64 t0 = tcg_temp_new_i64();
3163 TCGv_i64 t1 = tcg_temp_new_i64();
3164 TCGv_i32 t2, t3;
3165 TCGv_ptr ptr;
3166 unsigned desc, vsz = vec_full_reg_size(s);
3167 TCGCond cond;
3168
3169 if (!a->sf) {
3170 if (a->u) {
3171 tcg_gen_ext32u_i64(op0, op0);
3172 tcg_gen_ext32u_i64(op1, op1);
3173 } else {
3174 tcg_gen_ext32s_i64(op0, op0);
3175 tcg_gen_ext32s_i64(op1, op1);
3176 }
3177 }
3178
3179 /* For the helper, compress the different conditions into a computation
3180 * of how many iterations for which the condition is true.
3181 *
3182 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3183 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3184 * aren't that large, so any value >= predicate size is sufficient.
3185 */
3186 tcg_gen_sub_i64(t0, op1, op0);
3187
3188 /* t0 = MIN(op1 - op0, vsz). */
3189 tcg_gen_movi_i64(t1, vsz);
3190 tcg_gen_umin_i64(t0, t0, t1);
3191 if (a->eq) {
3192 /* Equality means one more iteration. */
3193 tcg_gen_addi_i64(t0, t0, 1);
3194 }
3195
3196 /* t0 = (condition true ? t0 : 0). */
3197 cond = (a->u
3198 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3199 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3200 tcg_gen_movi_i64(t1, 0);
3201 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3202
3203 t2 = tcg_temp_new_i32();
3204 tcg_gen_extrl_i64_i32(t2, t0);
3205 tcg_temp_free_i64(t0);
3206 tcg_temp_free_i64(t1);
3207
3208 desc = (vsz / 8) - 2;
3209 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3210 t3 = tcg_const_i32(desc);
3211
3212 ptr = tcg_temp_new_ptr();
3213 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3214
3215 gen_helper_sve_while(t2, ptr, t2, t3);
3216 do_pred_flags(t2);
3217
3218 tcg_temp_free_ptr(ptr);
3219 tcg_temp_free_i32(t2);
3220 tcg_temp_free_i32(t3);
3221 return true;
3222}
3223
ed491961
RH
3224/*
3225 *** SVE Integer Wide Immediate - Unpredicated Group
3226 */
3227
3228static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3229{
3230 if (a->esz == 0) {
3231 return false;
3232 }
3233 if (sve_access_check(s)) {
3234 unsigned vsz = vec_full_reg_size(s);
3235 int dofs = vec_full_reg_offset(s, a->rd);
3236 uint64_t imm;
3237
3238 /* Decode the VFP immediate. */
3239 imm = vfp_expand_imm(a->esz, a->imm);
3240 imm = dup_const(a->esz, imm);
3241
3242 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3243 }
3244 return true;
3245}
3246
3247static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3248{
3249 if (a->esz == 0 && extract32(insn, 13, 1)) {
3250 return false;
3251 }
3252 if (sve_access_check(s)) {
3253 unsigned vsz = vec_full_reg_size(s);
3254 int dofs = vec_full_reg_offset(s, a->rd);
3255
3256 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3257 }
3258 return true;
3259}
3260
6e6a157d
RH
3261static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3262{
3263 if (a->esz == 0 && extract32(insn, 13, 1)) {
3264 return false;
3265 }
3266 if (sve_access_check(s)) {
3267 unsigned vsz = vec_full_reg_size(s);
3268 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3269 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3270 }
3271 return true;
3272}
3273
3274static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3275{
3276 a->imm = -a->imm;
3277 return trans_ADD_zzi(s, a, insn);
3278}
3279
3280static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3281{
3282 static const GVecGen2s op[4] = {
3283 { .fni8 = tcg_gen_vec_sub8_i64,
3284 .fniv = tcg_gen_sub_vec,
3285 .fno = gen_helper_sve_subri_b,
3286 .opc = INDEX_op_sub_vec,
3287 .vece = MO_8,
3288 .scalar_first = true },
3289 { .fni8 = tcg_gen_vec_sub16_i64,
3290 .fniv = tcg_gen_sub_vec,
3291 .fno = gen_helper_sve_subri_h,
3292 .opc = INDEX_op_sub_vec,
3293 .vece = MO_16,
3294 .scalar_first = true },
3295 { .fni4 = tcg_gen_sub_i32,
3296 .fniv = tcg_gen_sub_vec,
3297 .fno = gen_helper_sve_subri_s,
3298 .opc = INDEX_op_sub_vec,
3299 .vece = MO_32,
3300 .scalar_first = true },
3301 { .fni8 = tcg_gen_sub_i64,
3302 .fniv = tcg_gen_sub_vec,
3303 .fno = gen_helper_sve_subri_d,
3304 .opc = INDEX_op_sub_vec,
3305 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3306 .vece = MO_64,
3307 .scalar_first = true }
3308 };
3309
3310 if (a->esz == 0 && extract32(insn, 13, 1)) {
3311 return false;
3312 }
3313 if (sve_access_check(s)) {
3314 unsigned vsz = vec_full_reg_size(s);
3315 TCGv_i64 c = tcg_const_i64(a->imm);
3316 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3317 vec_full_reg_offset(s, a->rn),
3318 vsz, vsz, c, &op[a->esz]);
3319 tcg_temp_free_i64(c);
3320 }
3321 return true;
3322}
3323
3324static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3325{
3326 if (sve_access_check(s)) {
3327 unsigned vsz = vec_full_reg_size(s);
3328 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3329 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3330 }
3331 return true;
3332}
3333
3334static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3335 bool u, bool d)
3336{
3337 if (a->esz == 0 && extract32(insn, 13, 1)) {
3338 return false;
3339 }
3340 if (sve_access_check(s)) {
3341 TCGv_i64 val = tcg_const_i64(a->imm);
3342 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3343 tcg_temp_free_i64(val);
3344 }
3345 return true;
3346}
3347
3348static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3349{
3350 return do_zzi_sat(s, a, insn, false, false);
3351}
3352
3353static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3354{
3355 return do_zzi_sat(s, a, insn, true, false);
3356}
3357
3358static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3359{
3360 return do_zzi_sat(s, a, insn, false, true);
3361}
3362
3363static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3364{
3365 return do_zzi_sat(s, a, insn, true, true);
3366}
3367
3368static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3369{
3370 if (sve_access_check(s)) {
3371 unsigned vsz = vec_full_reg_size(s);
3372 TCGv_i64 c = tcg_const_i64(a->imm);
3373
3374 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3375 vec_full_reg_offset(s, a->rn),
3376 c, vsz, vsz, 0, fn);
3377 tcg_temp_free_i64(c);
3378 }
3379 return true;
3380}
3381
3382#define DO_ZZI(NAME, name) \
3383static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3384 uint32_t insn) \
3385{ \
3386 static gen_helper_gvec_2i * const fns[4] = { \
3387 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3388 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3389 }; \
3390 return do_zzi_ool(s, a, fns[a->esz]); \
3391}
3392
3393DO_ZZI(SMAX, smax)
3394DO_ZZI(UMAX, umax)
3395DO_ZZI(SMIN, smin)
3396DO_ZZI(UMIN, umin)
3397
3398#undef DO_ZZI
3399
7f9ddf64
RH
3400/*
3401 *** SVE Floating Point Accumulating Reduction Group
3402 */
3403
3404static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3405{
3406 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3407 TCGv_ptr, TCGv_ptr, TCGv_i32);
3408 static fadda_fn * const fns[3] = {
3409 gen_helper_sve_fadda_h,
3410 gen_helper_sve_fadda_s,
3411 gen_helper_sve_fadda_d,
3412 };
3413 unsigned vsz = vec_full_reg_size(s);
3414 TCGv_ptr t_rm, t_pg, t_fpst;
3415 TCGv_i64 t_val;
3416 TCGv_i32 t_desc;
3417
3418 if (a->esz == 0) {
3419 return false;
3420 }
3421 if (!sve_access_check(s)) {
3422 return true;
3423 }
3424
3425 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3426 t_rm = tcg_temp_new_ptr();
3427 t_pg = tcg_temp_new_ptr();
3428 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3429 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3430 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3431 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3432
3433 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3434
3435 tcg_temp_free_i32(t_desc);
3436 tcg_temp_free_ptr(t_fpst);
3437 tcg_temp_free_ptr(t_pg);
3438 tcg_temp_free_ptr(t_rm);
3439
3440 write_fp_dreg(s, a->rd, t_val);
3441 tcg_temp_free_i64(t_val);
3442 return true;
3443}
3444
29b80469
RH
3445/*
3446 *** SVE Floating Point Arithmetic - Unpredicated Group
3447 */
3448
3449static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3450 gen_helper_gvec_3_ptr *fn)
3451{
3452 if (fn == NULL) {
3453 return false;
3454 }
3455 if (sve_access_check(s)) {
3456 unsigned vsz = vec_full_reg_size(s);
3457 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3458 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3459 vec_full_reg_offset(s, a->rn),
3460 vec_full_reg_offset(s, a->rm),
3461 status, vsz, vsz, 0, fn);
3462 tcg_temp_free_ptr(status);
3463 }
3464 return true;
3465}
3466
3467
3468#define DO_FP3(NAME, name) \
3469static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3470{ \
3471 static gen_helper_gvec_3_ptr * const fns[4] = { \
3472 NULL, gen_helper_gvec_##name##_h, \
3473 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3474 }; \
3475 return do_zzz_fp(s, a, fns[a->esz]); \
3476}
3477
3478DO_FP3(FADD_zzz, fadd)
3479DO_FP3(FSUB_zzz, fsub)
3480DO_FP3(FMUL_zzz, fmul)
3481DO_FP3(FTSMUL, ftsmul)
3482DO_FP3(FRECPS, recps)
3483DO_FP3(FRSQRTS, rsqrts)
3484
3485#undef DO_FP3
3486
ec3b87c2
RH
3487/*
3488 *** SVE Floating Point Arithmetic - Predicated Group
3489 */
3490
3491static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3492 gen_helper_gvec_4_ptr *fn)
3493{
3494 if (fn == NULL) {
3495 return false;
3496 }
3497 if (sve_access_check(s)) {
3498 unsigned vsz = vec_full_reg_size(s);
3499 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3500 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3501 vec_full_reg_offset(s, a->rn),
3502 vec_full_reg_offset(s, a->rm),
3503 pred_full_reg_offset(s, a->pg),
3504 status, vsz, vsz, 0, fn);
3505 tcg_temp_free_ptr(status);
3506 }
3507 return true;
3508}
3509
3510#define DO_FP3(NAME, name) \
3511static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3512{ \
3513 static gen_helper_gvec_4_ptr * const fns[4] = { \
3514 NULL, gen_helper_sve_##name##_h, \
3515 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3516 }; \
3517 return do_zpzz_fp(s, a, fns[a->esz]); \
3518}
3519
3520DO_FP3(FADD_zpzz, fadd)
3521DO_FP3(FSUB_zpzz, fsub)
3522DO_FP3(FMUL_zpzz, fmul)
3523DO_FP3(FMIN_zpzz, fmin)
3524DO_FP3(FMAX_zpzz, fmax)
3525DO_FP3(FMINNM_zpzz, fminnum)
3526DO_FP3(FMAXNM_zpzz, fmaxnum)
3527DO_FP3(FABD, fabd)
3528DO_FP3(FSCALE, fscalbn)
3529DO_FP3(FDIV, fdiv)
3530DO_FP3(FMULX, fmulx)
3531
3532#undef DO_FP3
8092c6a3 3533
6ceabaad
RH
3534typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3535
3536static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3537{
3538 if (fn == NULL) {
3539 return false;
3540 }
3541 if (!sve_access_check(s)) {
3542 return true;
3543 }
3544
3545 unsigned vsz = vec_full_reg_size(s);
3546 unsigned desc;
3547 TCGv_i32 t_desc;
3548 TCGv_ptr pg = tcg_temp_new_ptr();
3549
3550 /* We would need 7 operands to pass these arguments "properly".
3551 * So we encode all the register numbers into the descriptor.
3552 */
3553 desc = deposit32(a->rd, 5, 5, a->rn);
3554 desc = deposit32(desc, 10, 5, a->rm);
3555 desc = deposit32(desc, 15, 5, a->ra);
3556 desc = simd_desc(vsz, vsz, desc);
3557
3558 t_desc = tcg_const_i32(desc);
3559 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3560 fn(cpu_env, pg, t_desc);
3561 tcg_temp_free_i32(t_desc);
3562 tcg_temp_free_ptr(pg);
3563 return true;
3564}
3565
3566#define DO_FMLA(NAME, name) \
3567static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3568{ \
3569 static gen_helper_sve_fmla * const fns[4] = { \
3570 NULL, gen_helper_sve_##name##_h, \
3571 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3572 }; \
3573 return do_fmla(s, a, fns[a->esz]); \
3574}
3575
3576DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3577DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3578DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3579DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3580
3581#undef DO_FMLA
3582
8092c6a3
RH
3583/*
3584 *** SVE Floating Point Unary Operations Predicated Group
3585 */
3586
3587static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3588 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3589{
3590 if (sve_access_check(s)) {
3591 unsigned vsz = vec_full_reg_size(s);
3592 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3593 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3594 vec_full_reg_offset(s, rn),
3595 pred_full_reg_offset(s, pg),
3596 status, vsz, vsz, 0, fn);
3597 tcg_temp_free_ptr(status);
3598 }
3599 return true;
3600}
3601
3602static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3603{
3604 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3605}
3606
3607static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3608{
3609 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3610}
3611
3612static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3613{
3614 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3615}
3616
3617static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3618{
3619 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3620}
3621
3622static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3623{
3624 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3625}
3626
3627static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3628{
3629 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3630}
3631
3632static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3633{
3634 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3635}
3636
3637static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3638{
3639 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3640}
3641
3642static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3643{
3644 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3645}
3646
3647static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3648{
3649 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3650}
3651
3652static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3653{
3654 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3655}
3656
3657static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3658{
3659 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3660}
3661
3662static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3663{
3664 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3665}
3666
3667static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3668{
3669 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3670}
3671
d1822297
RH
3672/*
3673 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3674 */
3675
3676/* Subroutine loading a vector register at VOFS of LEN bytes.
3677 * The load should begin at the address Rn + IMM.
3678 */
3679
3680static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3681 int rn, int imm)
3682{
3683 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3684 uint32_t len_remain = len % 8;
3685 uint32_t nparts = len / 8 + ctpop8(len_remain);
3686 int midx = get_mem_index(s);
3687 TCGv_i64 addr, t0, t1;
3688
3689 addr = tcg_temp_new_i64();
3690 t0 = tcg_temp_new_i64();
3691
3692 /* Note that unpredicated load/store of vector/predicate registers
3693 * are defined as a stream of bytes, which equates to little-endian
3694 * operations on larger quantities. There is no nice way to force
3695 * a little-endian load for aarch64_be-linux-user out of line.
3696 *
3697 * Attempt to keep code expansion to a minimum by limiting the
3698 * amount of unrolling done.
3699 */
3700 if (nparts <= 4) {
3701 int i;
3702
3703 for (i = 0; i < len_align; i += 8) {
3704 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3705 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3706 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3707 }
3708 } else {
3709 TCGLabel *loop = gen_new_label();
3710 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3711
3712 gen_set_label(loop);
3713
3714 /* Minimize the number of local temps that must be re-read from
3715 * the stack each iteration. Instead, re-compute values other
3716 * than the loop counter.
3717 */
3718 tp = tcg_temp_new_ptr();
3719 tcg_gen_addi_ptr(tp, i, imm);
3720 tcg_gen_extu_ptr_i64(addr, tp);
3721 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3722
3723 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3724
3725 tcg_gen_add_ptr(tp, cpu_env, i);
3726 tcg_gen_addi_ptr(i, i, 8);
3727 tcg_gen_st_i64(t0, tp, vofs);
3728 tcg_temp_free_ptr(tp);
3729
3730 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3731 tcg_temp_free_ptr(i);
3732 }
3733
3734 /* Predicate register loads can be any multiple of 2.
3735 * Note that we still store the entire 64-bit unit into cpu_env.
3736 */
3737 if (len_remain) {
3738 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3739
3740 switch (len_remain) {
3741 case 2:
3742 case 4:
3743 case 8:
3744 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3745 break;
3746
3747 case 6:
3748 t1 = tcg_temp_new_i64();
3749 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3750 tcg_gen_addi_i64(addr, addr, 4);
3751 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3752 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3753 tcg_temp_free_i64(t1);
3754 break;
3755
3756 default:
3757 g_assert_not_reached();
3758 }
3759 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3760 }
3761 tcg_temp_free_i64(addr);
3762 tcg_temp_free_i64(t0);
3763}
3764
5047c204
RH
3765/* Similarly for stores. */
3766static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
3767 int rn, int imm)
3768{
3769 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3770 uint32_t len_remain = len % 8;
3771 uint32_t nparts = len / 8 + ctpop8(len_remain);
3772 int midx = get_mem_index(s);
3773 TCGv_i64 addr, t0;
3774
3775 addr = tcg_temp_new_i64();
3776 t0 = tcg_temp_new_i64();
3777
3778 /* Note that unpredicated load/store of vector/predicate registers
3779 * are defined as a stream of bytes, which equates to little-endian
3780 * operations on larger quantities. There is no nice way to force
3781 * a little-endian store for aarch64_be-linux-user out of line.
3782 *
3783 * Attempt to keep code expansion to a minimum by limiting the
3784 * amount of unrolling done.
3785 */
3786 if (nparts <= 4) {
3787 int i;
3788
3789 for (i = 0; i < len_align; i += 8) {
3790 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
3791 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3792 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3793 }
3794 } else {
3795 TCGLabel *loop = gen_new_label();
3796 TCGv_ptr t2, i = tcg_const_local_ptr(0);
3797
3798 gen_set_label(loop);
3799
3800 t2 = tcg_temp_new_ptr();
3801 tcg_gen_add_ptr(t2, cpu_env, i);
3802 tcg_gen_ld_i64(t0, t2, vofs);
3803
3804 /* Minimize the number of local temps that must be re-read from
3805 * the stack each iteration. Instead, re-compute values other
3806 * than the loop counter.
3807 */
3808 tcg_gen_addi_ptr(t2, i, imm);
3809 tcg_gen_extu_ptr_i64(addr, t2);
3810 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3811 tcg_temp_free_ptr(t2);
3812
3813 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3814
3815 tcg_gen_addi_ptr(i, i, 8);
3816
3817 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3818 tcg_temp_free_ptr(i);
3819 }
3820
3821 /* Predicate register stores can be any multiple of 2. */
3822 if (len_remain) {
3823 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
3824 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3825
3826 switch (len_remain) {
3827 case 2:
3828 case 4:
3829 case 8:
3830 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3831 break;
3832
3833 case 6:
3834 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
3835 tcg_gen_addi_i64(addr, addr, 4);
3836 tcg_gen_shri_i64(t0, t0, 32);
3837 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
3838 break;
3839
3840 default:
3841 g_assert_not_reached();
3842 }
3843 }
3844 tcg_temp_free_i64(addr);
3845 tcg_temp_free_i64(t0);
3846}
3847
d1822297
RH
3848static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3849{
3850 if (sve_access_check(s)) {
3851 int size = vec_full_reg_size(s);
3852 int off = vec_full_reg_offset(s, a->rd);
3853 do_ldr(s, off, size, a->rn, a->imm * size);
3854 }
3855 return true;
3856}
3857
3858static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3859{
3860 if (sve_access_check(s)) {
3861 int size = pred_full_reg_size(s);
3862 int off = pred_full_reg_offset(s, a->rd);
3863 do_ldr(s, off, size, a->rn, a->imm * size);
3864 }
3865 return true;
3866}
c4e7c493 3867
5047c204
RH
3868static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3869{
3870 if (sve_access_check(s)) {
3871 int size = vec_full_reg_size(s);
3872 int off = vec_full_reg_offset(s, a->rd);
3873 do_str(s, off, size, a->rn, a->imm * size);
3874 }
3875 return true;
3876}
3877
3878static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3879{
3880 if (sve_access_check(s)) {
3881 int size = pred_full_reg_size(s);
3882 int off = pred_full_reg_offset(s, a->rd);
3883 do_str(s, off, size, a->rn, a->imm * size);
3884 }
3885 return true;
3886}
3887
c4e7c493
RH
3888/*
3889 *** SVE Memory - Contiguous Load Group
3890 */
3891
3892/* The memory mode of the dtype. */
3893static const TCGMemOp dtype_mop[16] = {
3894 MO_UB, MO_UB, MO_UB, MO_UB,
3895 MO_SL, MO_UW, MO_UW, MO_UW,
3896 MO_SW, MO_SW, MO_UL, MO_UL,
3897 MO_SB, MO_SB, MO_SB, MO_Q
3898};
3899
3900#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
3901
3902/* The vector element size of dtype. */
3903static const uint8_t dtype_esz[16] = {
3904 0, 1, 2, 3,
3905 3, 1, 2, 3,
3906 3, 2, 2, 3,
3907 3, 2, 1, 3
3908};
3909
3910static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3911 gen_helper_gvec_mem *fn)
3912{
3913 unsigned vsz = vec_full_reg_size(s);
3914 TCGv_ptr t_pg;
3915 TCGv_i32 desc;
3916
3917 /* For e.g. LD4, there are not enough arguments to pass all 4
3918 * registers as pointers, so encode the regno into the data field.
3919 * For consistency, do this even for LD1.
3920 */
3921 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
3922 t_pg = tcg_temp_new_ptr();
3923
3924 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3925 fn(cpu_env, t_pg, addr, desc);
3926
3927 tcg_temp_free_ptr(t_pg);
3928 tcg_temp_free_i32(desc);
3929}
3930
3931static void do_ld_zpa(DisasContext *s, int zt, int pg,
3932 TCGv_i64 addr, int dtype, int nreg)
3933{
3934 static gen_helper_gvec_mem * const fns[16][4] = {
3935 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
3936 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
3937 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
3938 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
3939 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
3940
3941 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
3942 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
3943 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
3944 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
3945 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
3946
3947 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
3948 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
3949 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
3950 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
3951 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
3952
3953 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
3954 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
3955 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
3956 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
3957 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
3958 };
3959 gen_helper_gvec_mem *fn = fns[dtype][nreg];
3960
3961 /* While there are holes in the table, they are not
3962 * accessible via the instruction encoding.
3963 */
3964 assert(fn != NULL);
3965 do_mem_zpa(s, zt, pg, addr, fn);
3966}
3967
3968static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3969{
3970 if (a->rm == 31) {
3971 return false;
3972 }
3973 if (sve_access_check(s)) {
3974 TCGv_i64 addr = new_tmp_a64(s);
3975 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
3976 (a->nreg + 1) << dtype_msz(a->dtype));
3977 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3978 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3979 }
3980 return true;
3981}
3982
3983static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3984{
3985 if (sve_access_check(s)) {
3986 int vsz = vec_full_reg_size(s);
3987 int elements = vsz >> dtype_esz[a->dtype];
3988 TCGv_i64 addr = new_tmp_a64(s);
3989
3990 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3991 (a->imm * elements * (a->nreg + 1))
3992 << dtype_msz(a->dtype));
3993 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3994 }
3995 return true;
3996}
e2654d75
RH
3997
3998static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3999{
4000 static gen_helper_gvec_mem * const fns[16] = {
4001 gen_helper_sve_ldff1bb_r,
4002 gen_helper_sve_ldff1bhu_r,
4003 gen_helper_sve_ldff1bsu_r,
4004 gen_helper_sve_ldff1bdu_r,
4005
4006 gen_helper_sve_ldff1sds_r,
4007 gen_helper_sve_ldff1hh_r,
4008 gen_helper_sve_ldff1hsu_r,
4009 gen_helper_sve_ldff1hdu_r,
4010
4011 gen_helper_sve_ldff1hds_r,
4012 gen_helper_sve_ldff1hss_r,
4013 gen_helper_sve_ldff1ss_r,
4014 gen_helper_sve_ldff1sdu_r,
4015
4016 gen_helper_sve_ldff1bds_r,
4017 gen_helper_sve_ldff1bss_r,
4018 gen_helper_sve_ldff1bhs_r,
4019 gen_helper_sve_ldff1dd_r,
4020 };
4021
4022 if (sve_access_check(s)) {
4023 TCGv_i64 addr = new_tmp_a64(s);
4024 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4025 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4026 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4027 }
4028 return true;
4029}
4030
4031static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4032{
4033 static gen_helper_gvec_mem * const fns[16] = {
4034 gen_helper_sve_ldnf1bb_r,
4035 gen_helper_sve_ldnf1bhu_r,
4036 gen_helper_sve_ldnf1bsu_r,
4037 gen_helper_sve_ldnf1bdu_r,
4038
4039 gen_helper_sve_ldnf1sds_r,
4040 gen_helper_sve_ldnf1hh_r,
4041 gen_helper_sve_ldnf1hsu_r,
4042 gen_helper_sve_ldnf1hdu_r,
4043
4044 gen_helper_sve_ldnf1hds_r,
4045 gen_helper_sve_ldnf1hss_r,
4046 gen_helper_sve_ldnf1ss_r,
4047 gen_helper_sve_ldnf1sdu_r,
4048
4049 gen_helper_sve_ldnf1bds_r,
4050 gen_helper_sve_ldnf1bss_r,
4051 gen_helper_sve_ldnf1bhs_r,
4052 gen_helper_sve_ldnf1dd_r,
4053 };
4054
4055 if (sve_access_check(s)) {
4056 int vsz = vec_full_reg_size(s);
4057 int elements = vsz >> dtype_esz[a->dtype];
4058 int off = (a->imm * elements) << dtype_msz(a->dtype);
4059 TCGv_i64 addr = new_tmp_a64(s);
4060
4061 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4062 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4063 }
4064 return true;
4065}
1a039c7e 4066
05abe304
RH
4067static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4068{
4069 static gen_helper_gvec_mem * const fns[4] = {
4070 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4071 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4072 };
4073 unsigned vsz = vec_full_reg_size(s);
4074 TCGv_ptr t_pg;
4075 TCGv_i32 desc;
4076
4077 /* Load the first quadword using the normal predicated load helpers. */
4078 desc = tcg_const_i32(simd_desc(16, 16, zt));
4079 t_pg = tcg_temp_new_ptr();
4080
4081 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4082 fns[msz](cpu_env, t_pg, addr, desc);
4083
4084 tcg_temp_free_ptr(t_pg);
4085 tcg_temp_free_i32(desc);
4086
4087 /* Replicate that first quadword. */
4088 if (vsz > 16) {
4089 unsigned dofs = vec_full_reg_offset(s, zt);
4090 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4091 }
4092}
4093
4094static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4095{
4096 if (a->rm == 31) {
4097 return false;
4098 }
4099 if (sve_access_check(s)) {
4100 int msz = dtype_msz(a->dtype);
4101 TCGv_i64 addr = new_tmp_a64(s);
4102 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4103 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4104 do_ldrq(s, a->rd, a->pg, addr, msz);
4105 }
4106 return true;
4107}
4108
4109static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4110{
4111 if (sve_access_check(s)) {
4112 TCGv_i64 addr = new_tmp_a64(s);
4113 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4114 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4115 }
4116 return true;
4117}
4118
68459864
RH
4119/* Load and broadcast element. */
4120static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4121{
4122 if (!sve_access_check(s)) {
4123 return true;
4124 }
4125
4126 unsigned vsz = vec_full_reg_size(s);
4127 unsigned psz = pred_full_reg_size(s);
4128 unsigned esz = dtype_esz[a->dtype];
4129 TCGLabel *over = gen_new_label();
4130 TCGv_i64 temp;
4131
4132 /* If the guarding predicate has no bits set, no load occurs. */
4133 if (psz <= 8) {
4134 /* Reduce the pred_esz_masks value simply to reduce the
4135 * size of the code generated here.
4136 */
4137 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4138 temp = tcg_temp_new_i64();
4139 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4140 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4141 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4142 tcg_temp_free_i64(temp);
4143 } else {
4144 TCGv_i32 t32 = tcg_temp_new_i32();
4145 find_last_active(s, t32, esz, a->pg);
4146 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4147 tcg_temp_free_i32(t32);
4148 }
4149
4150 /* Load the data. */
4151 temp = tcg_temp_new_i64();
4152 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4153 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4154 s->be_data | dtype_mop[a->dtype]);
4155
4156 /* Broadcast to *all* elements. */
4157 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4158 vsz, vsz, temp);
4159 tcg_temp_free_i64(temp);
4160
4161 /* Zero the inactive elements. */
4162 gen_set_label(over);
4163 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4164 return true;
4165}
4166
1a039c7e
RH
4167static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4168 int msz, int esz, int nreg)
4169{
4170 static gen_helper_gvec_mem * const fn_single[4][4] = {
4171 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4172 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4173 { NULL, gen_helper_sve_st1hh_r,
4174 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4175 { NULL, NULL,
4176 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4177 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4178 };
4179 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4180 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4181 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4182 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4183 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4184 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4185 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4186 };
4187 gen_helper_gvec_mem *fn;
4188
4189 if (nreg == 0) {
4190 /* ST1 */
4191 fn = fn_single[msz][esz];
4192 } else {
4193 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4194 assert(msz == esz);
4195 fn = fn_multiple[nreg - 1][msz];
4196 }
4197 assert(fn != NULL);
4198 do_mem_zpa(s, zt, pg, addr, fn);
4199}
4200
4201static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4202{
4203 if (a->rm == 31 || a->msz > a->esz) {
4204 return false;
4205 }
4206 if (sve_access_check(s)) {
4207 TCGv_i64 addr = new_tmp_a64(s);
4208 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4209 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4210 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4211 }
4212 return true;
4213}
4214
4215static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4216{
4217 if (a->msz > a->esz) {
4218 return false;
4219 }
4220 if (sve_access_check(s)) {
4221 int vsz = vec_full_reg_size(s);
4222 int elements = vsz >> a->esz;
4223 TCGv_i64 addr = new_tmp_a64(s);
4224
4225 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4226 (a->imm * elements * (a->nreg + 1)) << a->msz);
4227 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4228 }
4229 return true;
4230}