]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement SVE Floating Point Accumulating Reduction Group
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
35
757f9cff 36
9ee3a611
RH
37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
38cadeba
RH
40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
757f9cff
RH
42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
c4e7c493
RH
45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
46
ccd841c3
RH
47/*
48 * Helpers for extracting complex instruction fields.
49 */
50
51/* See e.g. ASR (immediate, predicated).
52 * Returns -1 for unallocated encoding; diagnose later.
53 */
54static int tszimm_esz(int x)
55{
56 x >>= 3; /* discard imm3 */
57 return 31 - clz32(x);
58}
59
60static int tszimm_shr(int x)
61{
62 return (16 << tszimm_esz(x)) - x;
63}
64
65/* See e.g. LSL (immediate, predicated). */
66static int tszimm_shl(int x)
67{
68 return x - (8 << tszimm_esz(x));
69}
70
24e82e68
RH
71static inline int plus1(int x)
72{
73 return x + 1;
74}
75
f25a2361
RH
76/* The SH bit is in bit 8. Extract the low 8 and shift. */
77static inline int expand_imm_sh8s(int x)
78{
79 return (int8_t)x << (x & 0x100 ? 8 : 0);
80}
81
6e6a157d
RH
82static inline int expand_imm_sh8u(int x)
83{
84 return (uint8_t)x << (x & 0x100 ? 8 : 0);
85}
86
c4e7c493
RH
87/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
88 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
89 */
90static inline int msz_dtype(int msz)
91{
92 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
93 return dtype[msz];
94}
95
38388f7e
RH
96/*
97 * Include the generated decoder.
98 */
99
100#include "decode-sve.inc.c"
101
102/*
103 * Implement all of the translator functions referenced by the decoder.
104 */
105
d1822297
RH
106/* Return the offset info CPUARMState of the predicate vector register Pn.
107 * Note for this purpose, FFR is P16.
108 */
109static inline int pred_full_reg_offset(DisasContext *s, int regno)
110{
111 return offsetof(CPUARMState, vfp.pregs[regno]);
112}
113
114/* Return the byte size of the whole predicate register, VL / 64. */
115static inline int pred_full_reg_size(DisasContext *s)
116{
117 return s->sve_len >> 3;
118}
119
516e246a
RH
120/* Round up the size of a register to a size allowed by
121 * the tcg vector infrastructure. Any operation which uses this
122 * size may assume that the bits above pred_full_reg_size are zero,
123 * and must leave them the same way.
124 *
125 * Note that this is not needed for the vector registers as they
126 * are always properly sized for tcg vectors.
127 */
128static int size_for_gvec(int size)
129{
130 if (size <= 8) {
131 return 8;
132 } else {
133 return QEMU_ALIGN_UP(size, 16);
134 }
135}
136
137static int pred_gvec_reg_size(DisasContext *s)
138{
139 return size_for_gvec(pred_full_reg_size(s));
140}
141
39eea561
RH
142/* Invoke a vector expander on two Zregs. */
143static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
144 int esz, int rd, int rn)
38388f7e 145{
39eea561
RH
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 gvec_fn(esz, vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn), vsz, vsz);
150 }
151 return true;
38388f7e
RH
152}
153
39eea561
RH
154/* Invoke a vector expander on three Zregs. */
155static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
156 int esz, int rd, int rn, int rm)
38388f7e 157{
39eea561
RH
158 if (sve_access_check(s)) {
159 unsigned vsz = vec_full_reg_size(s);
160 gvec_fn(esz, vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm), vsz, vsz);
163 }
164 return true;
38388f7e
RH
165}
166
39eea561
RH
167/* Invoke a vector move on two Zregs. */
168static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 169{
39eea561 170 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
171}
172
d9d78dcc
RH
173/* Initialize a Zreg with replications of a 64-bit immediate. */
174static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
175{
176 unsigned vsz = vec_full_reg_size(s);
177 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
178}
179
516e246a
RH
180/* Invoke a vector expander on two Pregs. */
181static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
182 int esz, int rd, int rn)
183{
184 if (sve_access_check(s)) {
185 unsigned psz = pred_gvec_reg_size(s);
186 gvec_fn(esz, pred_full_reg_offset(s, rd),
187 pred_full_reg_offset(s, rn), psz, psz);
188 }
189 return true;
190}
191
192/* Invoke a vector expander on three Pregs. */
193static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
194 int esz, int rd, int rn, int rm)
195{
196 if (sve_access_check(s)) {
197 unsigned psz = pred_gvec_reg_size(s);
198 gvec_fn(esz, pred_full_reg_offset(s, rd),
199 pred_full_reg_offset(s, rn),
200 pred_full_reg_offset(s, rm), psz, psz);
201 }
202 return true;
203}
204
205/* Invoke a vector operation on four Pregs. */
206static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
207 int rd, int rn, int rm, int rg)
208{
209 if (sve_access_check(s)) {
210 unsigned psz = pred_gvec_reg_size(s);
211 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
212 pred_full_reg_offset(s, rn),
213 pred_full_reg_offset(s, rm),
214 pred_full_reg_offset(s, rg),
215 psz, psz, gvec_op);
216 }
217 return true;
218}
219
220/* Invoke a vector move on two Pregs. */
221static bool do_mov_p(DisasContext *s, int rd, int rn)
222{
223 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
224}
225
9e18d7a6
RH
226/* Set the cpu flags as per a return from an SVE helper. */
227static void do_pred_flags(TCGv_i32 t)
228{
229 tcg_gen_mov_i32(cpu_NF, t);
230 tcg_gen_andi_i32(cpu_ZF, t, 2);
231 tcg_gen_andi_i32(cpu_CF, t, 1);
232 tcg_gen_movi_i32(cpu_VF, 0);
233}
234
235/* Subroutines computing the ARM PredTest psuedofunction. */
236static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
237{
238 TCGv_i32 t = tcg_temp_new_i32();
239
240 gen_helper_sve_predtest1(t, d, g);
241 do_pred_flags(t);
242 tcg_temp_free_i32(t);
243}
244
245static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
246{
247 TCGv_ptr dptr = tcg_temp_new_ptr();
248 TCGv_ptr gptr = tcg_temp_new_ptr();
249 TCGv_i32 t;
250
251 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
252 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
253 t = tcg_const_i32(words);
254
255 gen_helper_sve_predtest(t, dptr, gptr, t);
256 tcg_temp_free_ptr(dptr);
257 tcg_temp_free_ptr(gptr);
258
259 do_pred_flags(t);
260 tcg_temp_free_i32(t);
261}
262
028e2a7b
RH
263/* For each element size, the bits within a predicate word that are active. */
264const uint64_t pred_esz_masks[4] = {
265 0xffffffffffffffffull, 0x5555555555555555ull,
266 0x1111111111111111ull, 0x0101010101010101ull
267};
268
39eea561
RH
269/*
270 *** SVE Logical - Unpredicated Group
271 */
272
273static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
274{
275 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
276}
277
278static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
279{
280 if (a->rn == a->rm) { /* MOV */
281 return do_mov_z(s, a->rd, a->rn);
282 } else {
283 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
284 }
285}
286
287static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
288{
289 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
290}
291
292static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 293{
39eea561 294 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 295}
d1822297 296
fea98f9c
RH
297/*
298 *** SVE Integer Arithmetic - Unpredicated Group
299 */
300
301static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
302{
303 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
304}
305
306static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
307{
308 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
309}
310
311static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
312{
313 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
314}
315
316static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
317{
318 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
319}
320
321static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
322{
323 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
324}
325
326static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
327{
328 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
329}
330
f97cfd59
RH
331/*
332 *** SVE Integer Arithmetic - Binary Predicated Group
333 */
334
335static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
336{
337 unsigned vsz = vec_full_reg_size(s);
338 if (fn == NULL) {
339 return false;
340 }
341 if (sve_access_check(s)) {
342 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
343 vec_full_reg_offset(s, a->rn),
344 vec_full_reg_offset(s, a->rm),
345 pred_full_reg_offset(s, a->pg),
346 vsz, vsz, 0, fn);
347 }
348 return true;
349}
350
351#define DO_ZPZZ(NAME, name) \
352static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
353 uint32_t insn) \
354{ \
355 static gen_helper_gvec_4 * const fns[4] = { \
356 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
357 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
358 }; \
359 return do_zpzz_ool(s, a, fns[a->esz]); \
360}
361
362DO_ZPZZ(AND, and)
363DO_ZPZZ(EOR, eor)
364DO_ZPZZ(ORR, orr)
365DO_ZPZZ(BIC, bic)
366
367DO_ZPZZ(ADD, add)
368DO_ZPZZ(SUB, sub)
369
370DO_ZPZZ(SMAX, smax)
371DO_ZPZZ(UMAX, umax)
372DO_ZPZZ(SMIN, smin)
373DO_ZPZZ(UMIN, umin)
374DO_ZPZZ(SABD, sabd)
375DO_ZPZZ(UABD, uabd)
376
377DO_ZPZZ(MUL, mul)
378DO_ZPZZ(SMULH, smulh)
379DO_ZPZZ(UMULH, umulh)
380
27721dbb
RH
381DO_ZPZZ(ASR, asr)
382DO_ZPZZ(LSR, lsr)
383DO_ZPZZ(LSL, lsl)
384
f97cfd59
RH
385static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
386{
387 static gen_helper_gvec_4 * const fns[4] = {
388 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
389 };
390 return do_zpzz_ool(s, a, fns[a->esz]);
391}
392
393static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
394{
395 static gen_helper_gvec_4 * const fns[4] = {
396 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
397 };
398 return do_zpzz_ool(s, a, fns[a->esz]);
399}
400
d3fe4a29
RH
401DO_ZPZZ(SEL, sel)
402
f97cfd59
RH
403#undef DO_ZPZZ
404
afac6d04
RH
405/*
406 *** SVE Integer Arithmetic - Unary Predicated Group
407 */
408
409static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
410{
411 if (fn == NULL) {
412 return false;
413 }
414 if (sve_access_check(s)) {
415 unsigned vsz = vec_full_reg_size(s);
416 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
417 vec_full_reg_offset(s, a->rn),
418 pred_full_reg_offset(s, a->pg),
419 vsz, vsz, 0, fn);
420 }
421 return true;
422}
423
424#define DO_ZPZ(NAME, name) \
425static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
426{ \
427 static gen_helper_gvec_3 * const fns[4] = { \
428 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
429 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
430 }; \
431 return do_zpz_ool(s, a, fns[a->esz]); \
432}
433
434DO_ZPZ(CLS, cls)
435DO_ZPZ(CLZ, clz)
436DO_ZPZ(CNT_zpz, cnt_zpz)
437DO_ZPZ(CNOT, cnot)
438DO_ZPZ(NOT_zpz, not_zpz)
439DO_ZPZ(ABS, abs)
440DO_ZPZ(NEG, neg)
441
442static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
443{
444 static gen_helper_gvec_3 * const fns[4] = {
445 NULL,
446 gen_helper_sve_fabs_h,
447 gen_helper_sve_fabs_s,
448 gen_helper_sve_fabs_d
449 };
450 return do_zpz_ool(s, a, fns[a->esz]);
451}
452
453static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
454{
455 static gen_helper_gvec_3 * const fns[4] = {
456 NULL,
457 gen_helper_sve_fneg_h,
458 gen_helper_sve_fneg_s,
459 gen_helper_sve_fneg_d
460 };
461 return do_zpz_ool(s, a, fns[a->esz]);
462}
463
464static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
465{
466 static gen_helper_gvec_3 * const fns[4] = {
467 NULL,
468 gen_helper_sve_sxtb_h,
469 gen_helper_sve_sxtb_s,
470 gen_helper_sve_sxtb_d
471 };
472 return do_zpz_ool(s, a, fns[a->esz]);
473}
474
475static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
476{
477 static gen_helper_gvec_3 * const fns[4] = {
478 NULL,
479 gen_helper_sve_uxtb_h,
480 gen_helper_sve_uxtb_s,
481 gen_helper_sve_uxtb_d
482 };
483 return do_zpz_ool(s, a, fns[a->esz]);
484}
485
486static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
487{
488 static gen_helper_gvec_3 * const fns[4] = {
489 NULL, NULL,
490 gen_helper_sve_sxth_s,
491 gen_helper_sve_sxth_d
492 };
493 return do_zpz_ool(s, a, fns[a->esz]);
494}
495
496static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
497{
498 static gen_helper_gvec_3 * const fns[4] = {
499 NULL, NULL,
500 gen_helper_sve_uxth_s,
501 gen_helper_sve_uxth_d
502 };
503 return do_zpz_ool(s, a, fns[a->esz]);
504}
505
506static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
507{
508 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
509}
510
511static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
512{
513 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
514}
515
516#undef DO_ZPZ
517
047cec97
RH
518/*
519 *** SVE Integer Reduction Group
520 */
521
522typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
523static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
524 gen_helper_gvec_reduc *fn)
525{
526 unsigned vsz = vec_full_reg_size(s);
527 TCGv_ptr t_zn, t_pg;
528 TCGv_i32 desc;
529 TCGv_i64 temp;
530
531 if (fn == NULL) {
532 return false;
533 }
534 if (!sve_access_check(s)) {
535 return true;
536 }
537
538 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
539 temp = tcg_temp_new_i64();
540 t_zn = tcg_temp_new_ptr();
541 t_pg = tcg_temp_new_ptr();
542
543 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
544 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
545 fn(temp, t_zn, t_pg, desc);
546 tcg_temp_free_ptr(t_zn);
547 tcg_temp_free_ptr(t_pg);
548 tcg_temp_free_i32(desc);
549
550 write_fp_dreg(s, a->rd, temp);
551 tcg_temp_free_i64(temp);
552 return true;
553}
554
555#define DO_VPZ(NAME, name) \
556static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
557{ \
558 static gen_helper_gvec_reduc * const fns[4] = { \
559 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
560 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
561 }; \
562 return do_vpz_ool(s, a, fns[a->esz]); \
563}
564
565DO_VPZ(ORV, orv)
566DO_VPZ(ANDV, andv)
567DO_VPZ(EORV, eorv)
568
569DO_VPZ(UADDV, uaddv)
570DO_VPZ(SMAXV, smaxv)
571DO_VPZ(UMAXV, umaxv)
572DO_VPZ(SMINV, sminv)
573DO_VPZ(UMINV, uminv)
574
575static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
576{
577 static gen_helper_gvec_reduc * const fns[4] = {
578 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
579 gen_helper_sve_saddv_s, NULL
580 };
581 return do_vpz_ool(s, a, fns[a->esz]);
582}
583
584#undef DO_VPZ
585
ccd841c3
RH
586/*
587 *** SVE Shift by Immediate - Predicated Group
588 */
589
590/* Store zero into every active element of Zd. We will use this for two
591 * and three-operand predicated instructions for which logic dictates a
592 * zero result.
593 */
594static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
595{
596 static gen_helper_gvec_2 * const fns[4] = {
597 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
598 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
599 };
600 if (sve_access_check(s)) {
601 unsigned vsz = vec_full_reg_size(s);
602 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
603 pred_full_reg_offset(s, pg),
604 vsz, vsz, 0, fns[esz]);
605 }
606 return true;
607}
608
609static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
610 gen_helper_gvec_3 *fn)
611{
612 if (sve_access_check(s)) {
613 unsigned vsz = vec_full_reg_size(s);
614 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
615 vec_full_reg_offset(s, a->rn),
616 pred_full_reg_offset(s, a->pg),
617 vsz, vsz, a->imm, fn);
618 }
619 return true;
620}
621
622static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
623{
624 static gen_helper_gvec_3 * const fns[4] = {
625 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
626 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
627 };
628 if (a->esz < 0) {
629 /* Invalid tsz encoding -- see tszimm_esz. */
630 return false;
631 }
632 /* Shift by element size is architecturally valid. For
633 arithmetic right-shift, it's the same as by one less. */
634 a->imm = MIN(a->imm, (8 << a->esz) - 1);
635 return do_zpzi_ool(s, a, fns[a->esz]);
636}
637
638static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
639{
640 static gen_helper_gvec_3 * const fns[4] = {
641 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
642 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
643 };
644 if (a->esz < 0) {
645 return false;
646 }
647 /* Shift by element size is architecturally valid.
648 For logical shifts, it is a zeroing operation. */
649 if (a->imm >= (8 << a->esz)) {
650 return do_clr_zp(s, a->rd, a->pg, a->esz);
651 } else {
652 return do_zpzi_ool(s, a, fns[a->esz]);
653 }
654}
655
656static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
657{
658 static gen_helper_gvec_3 * const fns[4] = {
659 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
660 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
661 };
662 if (a->esz < 0) {
663 return false;
664 }
665 /* Shift by element size is architecturally valid.
666 For logical shifts, it is a zeroing operation. */
667 if (a->imm >= (8 << a->esz)) {
668 return do_clr_zp(s, a->rd, a->pg, a->esz);
669 } else {
670 return do_zpzi_ool(s, a, fns[a->esz]);
671 }
672}
673
674static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
675{
676 static gen_helper_gvec_3 * const fns[4] = {
677 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
678 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
679 };
680 if (a->esz < 0) {
681 return false;
682 }
683 /* Shift by element size is architecturally valid. For arithmetic
684 right shift for division, it is a zeroing operation. */
685 if (a->imm >= (8 << a->esz)) {
686 return do_clr_zp(s, a->rd, a->pg, a->esz);
687 } else {
688 return do_zpzi_ool(s, a, fns[a->esz]);
689 }
690}
691
fe7f8dfb
RH
692/*
693 *** SVE Bitwise Shift - Predicated Group
694 */
695
696#define DO_ZPZW(NAME, name) \
697static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
698 uint32_t insn) \
699{ \
700 static gen_helper_gvec_4 * const fns[3] = { \
701 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
702 gen_helper_sve_##name##_zpzw_s, \
703 }; \
704 if (a->esz < 0 || a->esz >= 3) { \
705 return false; \
706 } \
707 return do_zpzz_ool(s, a, fns[a->esz]); \
708}
709
710DO_ZPZW(ASR, asr)
711DO_ZPZW(LSR, lsr)
712DO_ZPZW(LSL, lsl)
713
714#undef DO_ZPZW
715
d9d78dcc
RH
716/*
717 *** SVE Bitwise Shift - Unpredicated Group
718 */
719
720static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
721 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
722 int64_t, uint32_t, uint32_t))
723{
724 if (a->esz < 0) {
725 /* Invalid tsz encoding -- see tszimm_esz. */
726 return false;
727 }
728 if (sve_access_check(s)) {
729 unsigned vsz = vec_full_reg_size(s);
730 /* Shift by element size is architecturally valid. For
731 arithmetic right-shift, it's the same as by one less.
732 Otherwise it is a zeroing operation. */
733 if (a->imm >= 8 << a->esz) {
734 if (asr) {
735 a->imm = (8 << a->esz) - 1;
736 } else {
737 do_dupi_z(s, a->rd, 0);
738 return true;
739 }
740 }
741 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
742 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
743 }
744 return true;
745}
746
747static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
748{
749 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
750}
751
752static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
753{
754 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
755}
756
757static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
758{
759 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
760}
761
762static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
763{
764 if (fn == NULL) {
765 return false;
766 }
767 if (sve_access_check(s)) {
768 unsigned vsz = vec_full_reg_size(s);
769 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
770 vec_full_reg_offset(s, a->rn),
771 vec_full_reg_offset(s, a->rm),
772 vsz, vsz, 0, fn);
773 }
774 return true;
775}
776
777#define DO_ZZW(NAME, name) \
778static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
779 uint32_t insn) \
780{ \
781 static gen_helper_gvec_3 * const fns[4] = { \
782 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
783 gen_helper_sve_##name##_zzw_s, NULL \
784 }; \
785 return do_zzw_ool(s, a, fns[a->esz]); \
786}
787
788DO_ZZW(ASR, asr)
789DO_ZZW(LSR, lsr)
790DO_ZZW(LSL, lsl)
791
792#undef DO_ZZW
793
96a36e4a
RH
794/*
795 *** SVE Integer Multiply-Add Group
796 */
797
798static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
799 gen_helper_gvec_5 *fn)
800{
801 if (sve_access_check(s)) {
802 unsigned vsz = vec_full_reg_size(s);
803 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
804 vec_full_reg_offset(s, a->ra),
805 vec_full_reg_offset(s, a->rn),
806 vec_full_reg_offset(s, a->rm),
807 pred_full_reg_offset(s, a->pg),
808 vsz, vsz, 0, fn);
809 }
810 return true;
811}
812
813#define DO_ZPZZZ(NAME, name) \
814static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
815{ \
816 static gen_helper_gvec_5 * const fns[4] = { \
817 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
818 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
819 }; \
820 return do_zpzzz_ool(s, a, fns[a->esz]); \
821}
822
823DO_ZPZZZ(MLA, mla)
824DO_ZPZZZ(MLS, mls)
825
826#undef DO_ZPZZZ
827
9a56c9c3
RH
828/*
829 *** SVE Index Generation Group
830 */
831
832static void do_index(DisasContext *s, int esz, int rd,
833 TCGv_i64 start, TCGv_i64 incr)
834{
835 unsigned vsz = vec_full_reg_size(s);
836 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
837 TCGv_ptr t_zd = tcg_temp_new_ptr();
838
839 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
840 if (esz == 3) {
841 gen_helper_sve_index_d(t_zd, start, incr, desc);
842 } else {
843 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
844 static index_fn * const fns[3] = {
845 gen_helper_sve_index_b,
846 gen_helper_sve_index_h,
847 gen_helper_sve_index_s,
848 };
849 TCGv_i32 s32 = tcg_temp_new_i32();
850 TCGv_i32 i32 = tcg_temp_new_i32();
851
852 tcg_gen_extrl_i64_i32(s32, start);
853 tcg_gen_extrl_i64_i32(i32, incr);
854 fns[esz](t_zd, s32, i32, desc);
855
856 tcg_temp_free_i32(s32);
857 tcg_temp_free_i32(i32);
858 }
859 tcg_temp_free_ptr(t_zd);
860 tcg_temp_free_i32(desc);
861}
862
863static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
864{
865 if (sve_access_check(s)) {
866 TCGv_i64 start = tcg_const_i64(a->imm1);
867 TCGv_i64 incr = tcg_const_i64(a->imm2);
868 do_index(s, a->esz, a->rd, start, incr);
869 tcg_temp_free_i64(start);
870 tcg_temp_free_i64(incr);
871 }
872 return true;
873}
874
875static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
876{
877 if (sve_access_check(s)) {
878 TCGv_i64 start = tcg_const_i64(a->imm);
879 TCGv_i64 incr = cpu_reg(s, a->rm);
880 do_index(s, a->esz, a->rd, start, incr);
881 tcg_temp_free_i64(start);
882 }
883 return true;
884}
885
886static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
887{
888 if (sve_access_check(s)) {
889 TCGv_i64 start = cpu_reg(s, a->rn);
890 TCGv_i64 incr = tcg_const_i64(a->imm);
891 do_index(s, a->esz, a->rd, start, incr);
892 tcg_temp_free_i64(incr);
893 }
894 return true;
895}
896
897static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
898{
899 if (sve_access_check(s)) {
900 TCGv_i64 start = cpu_reg(s, a->rn);
901 TCGv_i64 incr = cpu_reg(s, a->rm);
902 do_index(s, a->esz, a->rd, start, incr);
903 }
904 return true;
905}
906
96f922cc
RH
907/*
908 *** SVE Stack Allocation Group
909 */
910
911static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
912{
913 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
914 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
915 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
916 return true;
917}
918
919static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
920{
921 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
922 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
923 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
924 return true;
925}
926
927static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
928{
929 TCGv_i64 reg = cpu_reg(s, a->rd);
930 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
931 return true;
932}
933
4b242d9c
RH
934/*
935 *** SVE Compute Vector Address Group
936 */
937
938static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
939{
940 if (sve_access_check(s)) {
941 unsigned vsz = vec_full_reg_size(s);
942 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
943 vec_full_reg_offset(s, a->rn),
944 vec_full_reg_offset(s, a->rm),
945 vsz, vsz, a->imm, fn);
946 }
947 return true;
948}
949
950static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
951{
952 return do_adr(s, a, gen_helper_sve_adr_p32);
953}
954
955static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
956{
957 return do_adr(s, a, gen_helper_sve_adr_p64);
958}
959
960static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
961{
962 return do_adr(s, a, gen_helper_sve_adr_s32);
963}
964
965static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
966{
967 return do_adr(s, a, gen_helper_sve_adr_u32);
968}
969
0762cd42
RH
970/*
971 *** SVE Integer Misc - Unpredicated Group
972 */
973
974static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
975{
976 static gen_helper_gvec_2 * const fns[4] = {
977 NULL,
978 gen_helper_sve_fexpa_h,
979 gen_helper_sve_fexpa_s,
980 gen_helper_sve_fexpa_d,
981 };
982 if (a->esz == 0) {
983 return false;
984 }
985 if (sve_access_check(s)) {
986 unsigned vsz = vec_full_reg_size(s);
987 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
988 vec_full_reg_offset(s, a->rn),
989 vsz, vsz, 0, fns[a->esz]);
990 }
991 return true;
992}
993
a1f233f2
RH
994static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
995{
996 static gen_helper_gvec_3 * const fns[4] = {
997 NULL,
998 gen_helper_sve_ftssel_h,
999 gen_helper_sve_ftssel_s,
1000 gen_helper_sve_ftssel_d,
1001 };
1002 if (a->esz == 0) {
1003 return false;
1004 }
1005 if (sve_access_check(s)) {
1006 unsigned vsz = vec_full_reg_size(s);
1007 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1008 vec_full_reg_offset(s, a->rn),
1009 vec_full_reg_offset(s, a->rm),
1010 vsz, vsz, 0, fns[a->esz]);
1011 }
1012 return true;
1013}
1014
516e246a
RH
1015/*
1016 *** SVE Predicate Logical Operations Group
1017 */
1018
1019static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1020 const GVecGen4 *gvec_op)
1021{
1022 if (!sve_access_check(s)) {
1023 return true;
1024 }
1025
1026 unsigned psz = pred_gvec_reg_size(s);
1027 int dofs = pred_full_reg_offset(s, a->rd);
1028 int nofs = pred_full_reg_offset(s, a->rn);
1029 int mofs = pred_full_reg_offset(s, a->rm);
1030 int gofs = pred_full_reg_offset(s, a->pg);
1031
1032 if (psz == 8) {
1033 /* Do the operation and the flags generation in temps. */
1034 TCGv_i64 pd = tcg_temp_new_i64();
1035 TCGv_i64 pn = tcg_temp_new_i64();
1036 TCGv_i64 pm = tcg_temp_new_i64();
1037 TCGv_i64 pg = tcg_temp_new_i64();
1038
1039 tcg_gen_ld_i64(pn, cpu_env, nofs);
1040 tcg_gen_ld_i64(pm, cpu_env, mofs);
1041 tcg_gen_ld_i64(pg, cpu_env, gofs);
1042
1043 gvec_op->fni8(pd, pn, pm, pg);
1044 tcg_gen_st_i64(pd, cpu_env, dofs);
1045
1046 do_predtest1(pd, pg);
1047
1048 tcg_temp_free_i64(pd);
1049 tcg_temp_free_i64(pn);
1050 tcg_temp_free_i64(pm);
1051 tcg_temp_free_i64(pg);
1052 } else {
1053 /* The operation and flags generation is large. The computation
1054 * of the flags depends on the original contents of the guarding
1055 * predicate. If the destination overwrites the guarding predicate,
1056 * then the easiest way to get this right is to save a copy.
1057 */
1058 int tofs = gofs;
1059 if (a->rd == a->pg) {
1060 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1061 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1062 }
1063
1064 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1065 do_predtest(s, dofs, tofs, psz / 8);
1066 }
1067 return true;
1068}
1069
1070static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1071{
1072 tcg_gen_and_i64(pd, pn, pm);
1073 tcg_gen_and_i64(pd, pd, pg);
1074}
1075
1076static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1077 TCGv_vec pm, TCGv_vec pg)
1078{
1079 tcg_gen_and_vec(vece, pd, pn, pm);
1080 tcg_gen_and_vec(vece, pd, pd, pg);
1081}
1082
1083static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1084{
1085 static const GVecGen4 op = {
1086 .fni8 = gen_and_pg_i64,
1087 .fniv = gen_and_pg_vec,
1088 .fno = gen_helper_sve_and_pppp,
1089 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1090 };
1091 if (a->s) {
1092 return do_pppp_flags(s, a, &op);
1093 } else if (a->rn == a->rm) {
1094 if (a->pg == a->rn) {
1095 return do_mov_p(s, a->rd, a->rn);
1096 } else {
1097 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1098 }
1099 } else if (a->pg == a->rn || a->pg == a->rm) {
1100 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1101 } else {
1102 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1103 }
1104}
1105
1106static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1107{
1108 tcg_gen_andc_i64(pd, pn, pm);
1109 tcg_gen_and_i64(pd, pd, pg);
1110}
1111
1112static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1113 TCGv_vec pm, TCGv_vec pg)
1114{
1115 tcg_gen_andc_vec(vece, pd, pn, pm);
1116 tcg_gen_and_vec(vece, pd, pd, pg);
1117}
1118
1119static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1120{
1121 static const GVecGen4 op = {
1122 .fni8 = gen_bic_pg_i64,
1123 .fniv = gen_bic_pg_vec,
1124 .fno = gen_helper_sve_bic_pppp,
1125 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1126 };
1127 if (a->s) {
1128 return do_pppp_flags(s, a, &op);
1129 } else if (a->pg == a->rn) {
1130 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1131 } else {
1132 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1133 }
1134}
1135
1136static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1137{
1138 tcg_gen_xor_i64(pd, pn, pm);
1139 tcg_gen_and_i64(pd, pd, pg);
1140}
1141
1142static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1143 TCGv_vec pm, TCGv_vec pg)
1144{
1145 tcg_gen_xor_vec(vece, pd, pn, pm);
1146 tcg_gen_and_vec(vece, pd, pd, pg);
1147}
1148
1149static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1150{
1151 static const GVecGen4 op = {
1152 .fni8 = gen_eor_pg_i64,
1153 .fniv = gen_eor_pg_vec,
1154 .fno = gen_helper_sve_eor_pppp,
1155 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1156 };
1157 if (a->s) {
1158 return do_pppp_flags(s, a, &op);
1159 } else {
1160 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1161 }
1162}
1163
1164static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1165{
1166 tcg_gen_and_i64(pn, pn, pg);
1167 tcg_gen_andc_i64(pm, pm, pg);
1168 tcg_gen_or_i64(pd, pn, pm);
1169}
1170
1171static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1172 TCGv_vec pm, TCGv_vec pg)
1173{
1174 tcg_gen_and_vec(vece, pn, pn, pg);
1175 tcg_gen_andc_vec(vece, pm, pm, pg);
1176 tcg_gen_or_vec(vece, pd, pn, pm);
1177}
1178
1179static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1180{
1181 static const GVecGen4 op = {
1182 .fni8 = gen_sel_pg_i64,
1183 .fniv = gen_sel_pg_vec,
1184 .fno = gen_helper_sve_sel_pppp,
1185 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1186 };
1187 if (a->s) {
1188 return false;
1189 } else {
1190 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1191 }
1192}
1193
1194static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1195{
1196 tcg_gen_or_i64(pd, pn, pm);
1197 tcg_gen_and_i64(pd, pd, pg);
1198}
1199
1200static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1201 TCGv_vec pm, TCGv_vec pg)
1202{
1203 tcg_gen_or_vec(vece, pd, pn, pm);
1204 tcg_gen_and_vec(vece, pd, pd, pg);
1205}
1206
1207static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1208{
1209 static const GVecGen4 op = {
1210 .fni8 = gen_orr_pg_i64,
1211 .fniv = gen_orr_pg_vec,
1212 .fno = gen_helper_sve_orr_pppp,
1213 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1214 };
1215 if (a->s) {
1216 return do_pppp_flags(s, a, &op);
1217 } else if (a->pg == a->rn && a->rn == a->rm) {
1218 return do_mov_p(s, a->rd, a->rn);
1219 } else {
1220 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1221 }
1222}
1223
1224static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1225{
1226 tcg_gen_orc_i64(pd, pn, pm);
1227 tcg_gen_and_i64(pd, pd, pg);
1228}
1229
1230static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1231 TCGv_vec pm, TCGv_vec pg)
1232{
1233 tcg_gen_orc_vec(vece, pd, pn, pm);
1234 tcg_gen_and_vec(vece, pd, pd, pg);
1235}
1236
1237static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1238{
1239 static const GVecGen4 op = {
1240 .fni8 = gen_orn_pg_i64,
1241 .fniv = gen_orn_pg_vec,
1242 .fno = gen_helper_sve_orn_pppp,
1243 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1244 };
1245 if (a->s) {
1246 return do_pppp_flags(s, a, &op);
1247 } else {
1248 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1249 }
1250}
1251
1252static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1253{
1254 tcg_gen_or_i64(pd, pn, pm);
1255 tcg_gen_andc_i64(pd, pg, pd);
1256}
1257
1258static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1259 TCGv_vec pm, TCGv_vec pg)
1260{
1261 tcg_gen_or_vec(vece, pd, pn, pm);
1262 tcg_gen_andc_vec(vece, pd, pg, pd);
1263}
1264
1265static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1266{
1267 static const GVecGen4 op = {
1268 .fni8 = gen_nor_pg_i64,
1269 .fniv = gen_nor_pg_vec,
1270 .fno = gen_helper_sve_nor_pppp,
1271 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1272 };
1273 if (a->s) {
1274 return do_pppp_flags(s, a, &op);
1275 } else {
1276 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1277 }
1278}
1279
1280static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1281{
1282 tcg_gen_and_i64(pd, pn, pm);
1283 tcg_gen_andc_i64(pd, pg, pd);
1284}
1285
1286static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1287 TCGv_vec pm, TCGv_vec pg)
1288{
1289 tcg_gen_and_vec(vece, pd, pn, pm);
1290 tcg_gen_andc_vec(vece, pd, pg, pd);
1291}
1292
1293static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1294{
1295 static const GVecGen4 op = {
1296 .fni8 = gen_nand_pg_i64,
1297 .fniv = gen_nand_pg_vec,
1298 .fno = gen_helper_sve_nand_pppp,
1299 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1300 };
1301 if (a->s) {
1302 return do_pppp_flags(s, a, &op);
1303 } else {
1304 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1305 }
1306}
1307
9e18d7a6
RH
1308/*
1309 *** SVE Predicate Misc Group
1310 */
1311
1312static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1313{
1314 if (sve_access_check(s)) {
1315 int nofs = pred_full_reg_offset(s, a->rn);
1316 int gofs = pred_full_reg_offset(s, a->pg);
1317 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1318
1319 if (words == 1) {
1320 TCGv_i64 pn = tcg_temp_new_i64();
1321 TCGv_i64 pg = tcg_temp_new_i64();
1322
1323 tcg_gen_ld_i64(pn, cpu_env, nofs);
1324 tcg_gen_ld_i64(pg, cpu_env, gofs);
1325 do_predtest1(pn, pg);
1326
1327 tcg_temp_free_i64(pn);
1328 tcg_temp_free_i64(pg);
1329 } else {
1330 do_predtest(s, nofs, gofs, words);
1331 }
1332 }
1333 return true;
1334}
1335
028e2a7b
RH
1336/* See the ARM pseudocode DecodePredCount. */
1337static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1338{
1339 unsigned elements = fullsz >> esz;
1340 unsigned bound;
1341
1342 switch (pattern) {
1343 case 0x0: /* POW2 */
1344 return pow2floor(elements);
1345 case 0x1: /* VL1 */
1346 case 0x2: /* VL2 */
1347 case 0x3: /* VL3 */
1348 case 0x4: /* VL4 */
1349 case 0x5: /* VL5 */
1350 case 0x6: /* VL6 */
1351 case 0x7: /* VL7 */
1352 case 0x8: /* VL8 */
1353 bound = pattern;
1354 break;
1355 case 0x9: /* VL16 */
1356 case 0xa: /* VL32 */
1357 case 0xb: /* VL64 */
1358 case 0xc: /* VL128 */
1359 case 0xd: /* VL256 */
1360 bound = 16 << (pattern - 9);
1361 break;
1362 case 0x1d: /* MUL4 */
1363 return elements - elements % 4;
1364 case 0x1e: /* MUL3 */
1365 return elements - elements % 3;
1366 case 0x1f: /* ALL */
1367 return elements;
1368 default: /* #uimm5 */
1369 return 0;
1370 }
1371 return elements >= bound ? bound : 0;
1372}
1373
1374/* This handles all of the predicate initialization instructions,
1375 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1376 * so that decode_pred_count returns 0. For SETFFR, we will have
1377 * set RD == 16 == FFR.
1378 */
1379static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1380{
1381 if (!sve_access_check(s)) {
1382 return true;
1383 }
1384
1385 unsigned fullsz = vec_full_reg_size(s);
1386 unsigned ofs = pred_full_reg_offset(s, rd);
1387 unsigned numelem, setsz, i;
1388 uint64_t word, lastword;
1389 TCGv_i64 t;
1390
1391 numelem = decode_pred_count(fullsz, pat, esz);
1392
1393 /* Determine what we must store into each bit, and how many. */
1394 if (numelem == 0) {
1395 lastword = word = 0;
1396 setsz = fullsz;
1397 } else {
1398 setsz = numelem << esz;
1399 lastword = word = pred_esz_masks[esz];
1400 if (setsz % 64) {
1401 lastword &= ~(-1ull << (setsz % 64));
1402 }
1403 }
1404
1405 t = tcg_temp_new_i64();
1406 if (fullsz <= 64) {
1407 tcg_gen_movi_i64(t, lastword);
1408 tcg_gen_st_i64(t, cpu_env, ofs);
1409 goto done;
1410 }
1411
1412 if (word == lastword) {
1413 unsigned maxsz = size_for_gvec(fullsz / 8);
1414 unsigned oprsz = size_for_gvec(setsz / 8);
1415
1416 if (oprsz * 8 == setsz) {
1417 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1418 goto done;
1419 }
1420 if (oprsz * 8 == setsz + 8) {
1421 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1422 tcg_gen_movi_i64(t, 0);
1423 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1424 goto done;
1425 }
1426 }
1427
1428 setsz /= 8;
1429 fullsz /= 8;
1430
1431 tcg_gen_movi_i64(t, word);
1432 for (i = 0; i < setsz; i += 8) {
1433 tcg_gen_st_i64(t, cpu_env, ofs + i);
1434 }
1435 if (lastword != word) {
1436 tcg_gen_movi_i64(t, lastword);
1437 tcg_gen_st_i64(t, cpu_env, ofs + i);
1438 i += 8;
1439 }
1440 if (i < fullsz) {
1441 tcg_gen_movi_i64(t, 0);
1442 for (; i < fullsz; i += 8) {
1443 tcg_gen_st_i64(t, cpu_env, ofs + i);
1444 }
1445 }
1446
1447 done:
1448 tcg_temp_free_i64(t);
1449
1450 /* PTRUES */
1451 if (setflag) {
1452 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1453 tcg_gen_movi_i32(cpu_CF, word == 0);
1454 tcg_gen_movi_i32(cpu_VF, 0);
1455 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1456 }
1457 return true;
1458}
1459
1460static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1461{
1462 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1463}
1464
1465static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1466{
1467 /* Note pat == 31 is #all, to set all elements. */
1468 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1469}
1470
1471static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1472{
1473 /* Note pat == 32 is #unimp, to set no elements. */
1474 return do_predset(s, 0, a->rd, 32, false);
1475}
1476
1477static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1478{
1479 /* The path through do_pppp_flags is complicated enough to want to avoid
1480 * duplication. Frob the arguments into the form of a predicated AND.
1481 */
1482 arg_rprr_s alt_a = {
1483 .rd = a->rd, .pg = a->pg, .s = a->s,
1484 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1485 };
1486 return trans_AND_pppp(s, &alt_a, insn);
1487}
1488
1489static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1490{
1491 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1492}
1493
1494static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1495{
1496 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1497}
1498
1499static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1500 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1501 TCGv_ptr, TCGv_i32))
1502{
1503 if (!sve_access_check(s)) {
1504 return true;
1505 }
1506
1507 TCGv_ptr t_pd = tcg_temp_new_ptr();
1508 TCGv_ptr t_pg = tcg_temp_new_ptr();
1509 TCGv_i32 t;
1510 unsigned desc;
1511
1512 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1513 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1514
1515 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1516 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1517 t = tcg_const_i32(desc);
1518
1519 gen_fn(t, t_pd, t_pg, t);
1520 tcg_temp_free_ptr(t_pd);
1521 tcg_temp_free_ptr(t_pg);
1522
1523 do_pred_flags(t);
1524 tcg_temp_free_i32(t);
1525 return true;
1526}
1527
1528static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1529{
1530 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1531}
1532
1533static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1534{
1535 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1536}
1537
24e82e68
RH
1538/*
1539 *** SVE Element Count Group
1540 */
1541
1542/* Perform an inline saturating addition of a 32-bit value within
1543 * a 64-bit register. The second operand is known to be positive,
1544 * which halves the comparisions we must perform to bound the result.
1545 */
1546static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1547{
1548 int64_t ibound;
1549 TCGv_i64 bound;
1550 TCGCond cond;
1551
1552 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1553 if (u) {
1554 tcg_gen_ext32u_i64(reg, reg);
1555 } else {
1556 tcg_gen_ext32s_i64(reg, reg);
1557 }
1558 if (d) {
1559 tcg_gen_sub_i64(reg, reg, val);
1560 ibound = (u ? 0 : INT32_MIN);
1561 cond = TCG_COND_LT;
1562 } else {
1563 tcg_gen_add_i64(reg, reg, val);
1564 ibound = (u ? UINT32_MAX : INT32_MAX);
1565 cond = TCG_COND_GT;
1566 }
1567 bound = tcg_const_i64(ibound);
1568 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1569 tcg_temp_free_i64(bound);
1570}
1571
1572/* Similarly with 64-bit values. */
1573static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1574{
1575 TCGv_i64 t0 = tcg_temp_new_i64();
1576 TCGv_i64 t1 = tcg_temp_new_i64();
1577 TCGv_i64 t2;
1578
1579 if (u) {
1580 if (d) {
1581 tcg_gen_sub_i64(t0, reg, val);
1582 tcg_gen_movi_i64(t1, 0);
1583 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1584 } else {
1585 tcg_gen_add_i64(t0, reg, val);
1586 tcg_gen_movi_i64(t1, -1);
1587 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1588 }
1589 } else {
1590 if (d) {
1591 /* Detect signed overflow for subtraction. */
1592 tcg_gen_xor_i64(t0, reg, val);
1593 tcg_gen_sub_i64(t1, reg, val);
1594 tcg_gen_xor_i64(reg, reg, t0);
1595 tcg_gen_and_i64(t0, t0, reg);
1596
1597 /* Bound the result. */
1598 tcg_gen_movi_i64(reg, INT64_MIN);
1599 t2 = tcg_const_i64(0);
1600 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1601 } else {
1602 /* Detect signed overflow for addition. */
1603 tcg_gen_xor_i64(t0, reg, val);
1604 tcg_gen_add_i64(reg, reg, val);
1605 tcg_gen_xor_i64(t1, reg, val);
1606 tcg_gen_andc_i64(t0, t1, t0);
1607
1608 /* Bound the result. */
1609 tcg_gen_movi_i64(t1, INT64_MAX);
1610 t2 = tcg_const_i64(0);
1611 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1612 }
1613 tcg_temp_free_i64(t2);
1614 }
1615 tcg_temp_free_i64(t0);
1616 tcg_temp_free_i64(t1);
1617}
1618
1619/* Similarly with a vector and a scalar operand. */
1620static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1621 TCGv_i64 val, bool u, bool d)
1622{
1623 unsigned vsz = vec_full_reg_size(s);
1624 TCGv_ptr dptr, nptr;
1625 TCGv_i32 t32, desc;
1626 TCGv_i64 t64;
1627
1628 dptr = tcg_temp_new_ptr();
1629 nptr = tcg_temp_new_ptr();
1630 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1631 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1632 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1633
1634 switch (esz) {
1635 case MO_8:
1636 t32 = tcg_temp_new_i32();
1637 tcg_gen_extrl_i64_i32(t32, val);
1638 if (d) {
1639 tcg_gen_neg_i32(t32, t32);
1640 }
1641 if (u) {
1642 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1643 } else {
1644 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1645 }
1646 tcg_temp_free_i32(t32);
1647 break;
1648
1649 case MO_16:
1650 t32 = tcg_temp_new_i32();
1651 tcg_gen_extrl_i64_i32(t32, val);
1652 if (d) {
1653 tcg_gen_neg_i32(t32, t32);
1654 }
1655 if (u) {
1656 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1657 } else {
1658 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1659 }
1660 tcg_temp_free_i32(t32);
1661 break;
1662
1663 case MO_32:
1664 t64 = tcg_temp_new_i64();
1665 if (d) {
1666 tcg_gen_neg_i64(t64, val);
1667 } else {
1668 tcg_gen_mov_i64(t64, val);
1669 }
1670 if (u) {
1671 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1672 } else {
1673 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1674 }
1675 tcg_temp_free_i64(t64);
1676 break;
1677
1678 case MO_64:
1679 if (u) {
1680 if (d) {
1681 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1682 } else {
1683 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1684 }
1685 } else if (d) {
1686 t64 = tcg_temp_new_i64();
1687 tcg_gen_neg_i64(t64, val);
1688 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1689 tcg_temp_free_i64(t64);
1690 } else {
1691 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1692 }
1693 break;
1694
1695 default:
1696 g_assert_not_reached();
1697 }
1698
1699 tcg_temp_free_ptr(dptr);
1700 tcg_temp_free_ptr(nptr);
1701 tcg_temp_free_i32(desc);
1702}
1703
1704static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1705{
1706 if (sve_access_check(s)) {
1707 unsigned fullsz = vec_full_reg_size(s);
1708 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1709 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1710 }
1711 return true;
1712}
1713
1714static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1715{
1716 if (sve_access_check(s)) {
1717 unsigned fullsz = vec_full_reg_size(s);
1718 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1719 int inc = numelem * a->imm * (a->d ? -1 : 1);
1720 TCGv_i64 reg = cpu_reg(s, a->rd);
1721
1722 tcg_gen_addi_i64(reg, reg, inc);
1723 }
1724 return true;
1725}
1726
1727static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1728 uint32_t insn)
1729{
1730 if (!sve_access_check(s)) {
1731 return true;
1732 }
1733
1734 unsigned fullsz = vec_full_reg_size(s);
1735 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736 int inc = numelem * a->imm;
1737 TCGv_i64 reg = cpu_reg(s, a->rd);
1738
1739 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1740 if (inc == 0) {
1741 if (a->u) {
1742 tcg_gen_ext32u_i64(reg, reg);
1743 } else {
1744 tcg_gen_ext32s_i64(reg, reg);
1745 }
1746 } else {
1747 TCGv_i64 t = tcg_const_i64(inc);
1748 do_sat_addsub_32(reg, t, a->u, a->d);
1749 tcg_temp_free_i64(t);
1750 }
1751 return true;
1752}
1753
1754static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1755 uint32_t insn)
1756{
1757 if (!sve_access_check(s)) {
1758 return true;
1759 }
1760
1761 unsigned fullsz = vec_full_reg_size(s);
1762 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1763 int inc = numelem * a->imm;
1764 TCGv_i64 reg = cpu_reg(s, a->rd);
1765
1766 if (inc != 0) {
1767 TCGv_i64 t = tcg_const_i64(inc);
1768 do_sat_addsub_64(reg, t, a->u, a->d);
1769 tcg_temp_free_i64(t);
1770 }
1771 return true;
1772}
1773
1774static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1775{
1776 if (a->esz == 0) {
1777 return false;
1778 }
1779
1780 unsigned fullsz = vec_full_reg_size(s);
1781 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1782 int inc = numelem * a->imm;
1783
1784 if (inc != 0) {
1785 if (sve_access_check(s)) {
1786 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1787 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1788 vec_full_reg_offset(s, a->rn),
1789 t, fullsz, fullsz);
1790 tcg_temp_free_i64(t);
1791 }
1792 } else {
1793 do_mov_z(s, a->rd, a->rn);
1794 }
1795 return true;
1796}
1797
1798static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1799 uint32_t insn)
1800{
1801 if (a->esz == 0) {
1802 return false;
1803 }
1804
1805 unsigned fullsz = vec_full_reg_size(s);
1806 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1807 int inc = numelem * a->imm;
1808
1809 if (inc != 0) {
1810 if (sve_access_check(s)) {
1811 TCGv_i64 t = tcg_const_i64(inc);
1812 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1813 tcg_temp_free_i64(t);
1814 }
1815 } else {
1816 do_mov_z(s, a->rd, a->rn);
1817 }
1818 return true;
1819}
1820
e1fa1164
RH
1821/*
1822 *** SVE Bitwise Immediate Group
1823 */
1824
1825static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1826{
1827 uint64_t imm;
1828 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1829 extract32(a->dbm, 0, 6),
1830 extract32(a->dbm, 6, 6))) {
1831 return false;
1832 }
1833 if (sve_access_check(s)) {
1834 unsigned vsz = vec_full_reg_size(s);
1835 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1836 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1837 }
1838 return true;
1839}
1840
1841static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1842{
1843 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1844}
1845
1846static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1847{
1848 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1849}
1850
1851static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1852{
1853 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1854}
1855
1856static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1857{
1858 uint64_t imm;
1859 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1860 extract32(a->dbm, 0, 6),
1861 extract32(a->dbm, 6, 6))) {
1862 return false;
1863 }
1864 if (sve_access_check(s)) {
1865 do_dupi_z(s, a->rd, imm);
1866 }
1867 return true;
1868}
1869
f25a2361
RH
1870/*
1871 *** SVE Integer Wide Immediate - Predicated Group
1872 */
1873
1874/* Implement all merging copies. This is used for CPY (immediate),
1875 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1876 */
1877static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1878 TCGv_i64 val)
1879{
1880 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1881 static gen_cpy * const fns[4] = {
1882 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1883 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1884 };
1885 unsigned vsz = vec_full_reg_size(s);
1886 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1887 TCGv_ptr t_zd = tcg_temp_new_ptr();
1888 TCGv_ptr t_zn = tcg_temp_new_ptr();
1889 TCGv_ptr t_pg = tcg_temp_new_ptr();
1890
1891 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1892 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1893 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1894
1895 fns[esz](t_zd, t_zn, t_pg, val, desc);
1896
1897 tcg_temp_free_ptr(t_zd);
1898 tcg_temp_free_ptr(t_zn);
1899 tcg_temp_free_ptr(t_pg);
1900 tcg_temp_free_i32(desc);
1901}
1902
1903static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1904{
1905 if (a->esz == 0) {
1906 return false;
1907 }
1908 if (sve_access_check(s)) {
1909 /* Decode the VFP immediate. */
1910 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1911 TCGv_i64 t_imm = tcg_const_i64(imm);
1912 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1913 tcg_temp_free_i64(t_imm);
1914 }
1915 return true;
1916}
1917
1918static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1919{
1920 if (a->esz == 0 && extract32(insn, 13, 1)) {
1921 return false;
1922 }
1923 if (sve_access_check(s)) {
1924 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1925 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1926 tcg_temp_free_i64(t_imm);
1927 }
1928 return true;
1929}
1930
1931static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1932{
1933 static gen_helper_gvec_2i * const fns[4] = {
1934 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1935 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1936 };
1937
1938 if (a->esz == 0 && extract32(insn, 13, 1)) {
1939 return false;
1940 }
1941 if (sve_access_check(s)) {
1942 unsigned vsz = vec_full_reg_size(s);
1943 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1944 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1945 pred_full_reg_offset(s, a->pg),
1946 t_imm, vsz, vsz, 0, fns[a->esz]);
1947 tcg_temp_free_i64(t_imm);
1948 }
1949 return true;
1950}
1951
b94f8f60
RH
1952/*
1953 *** SVE Permute Extract Group
1954 */
1955
1956static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1957{
1958 if (!sve_access_check(s)) {
1959 return true;
1960 }
1961
1962 unsigned vsz = vec_full_reg_size(s);
1963 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1964 unsigned n_siz = vsz - n_ofs;
1965 unsigned d = vec_full_reg_offset(s, a->rd);
1966 unsigned n = vec_full_reg_offset(s, a->rn);
1967 unsigned m = vec_full_reg_offset(s, a->rm);
1968
1969 /* Use host vector move insns if we have appropriate sizes
1970 * and no unfortunate overlap.
1971 */
1972 if (m != d
1973 && n_ofs == size_for_gvec(n_ofs)
1974 && n_siz == size_for_gvec(n_siz)
1975 && (d != n || n_siz <= n_ofs)) {
1976 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1977 if (n_ofs != 0) {
1978 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1979 }
1980 } else {
1981 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1982 }
1983 return true;
1984}
1985
30562ab7
RH
1986/*
1987 *** SVE Permute - Unpredicated Group
1988 */
1989
1990static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1991{
1992 if (sve_access_check(s)) {
1993 unsigned vsz = vec_full_reg_size(s);
1994 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1995 vsz, vsz, cpu_reg_sp(s, a->rn));
1996 }
1997 return true;
1998}
1999
2000static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2001{
2002 if ((a->imm & 0x1f) == 0) {
2003 return false;
2004 }
2005 if (sve_access_check(s)) {
2006 unsigned vsz = vec_full_reg_size(s);
2007 unsigned dofs = vec_full_reg_offset(s, a->rd);
2008 unsigned esz, index;
2009
2010 esz = ctz32(a->imm);
2011 index = a->imm >> (esz + 1);
2012
2013 if ((index << esz) < vsz) {
2014 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2015 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2016 } else {
2017 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2018 }
2019 }
2020 return true;
2021}
2022
2023static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2024{
2025 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2026 static gen_insr * const fns[4] = {
2027 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2028 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2029 };
2030 unsigned vsz = vec_full_reg_size(s);
2031 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2032 TCGv_ptr t_zd = tcg_temp_new_ptr();
2033 TCGv_ptr t_zn = tcg_temp_new_ptr();
2034
2035 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2036 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2037
2038 fns[a->esz](t_zd, t_zn, val, desc);
2039
2040 tcg_temp_free_ptr(t_zd);
2041 tcg_temp_free_ptr(t_zn);
2042 tcg_temp_free_i32(desc);
2043}
2044
2045static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2046{
2047 if (sve_access_check(s)) {
2048 TCGv_i64 t = tcg_temp_new_i64();
2049 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2050 do_insr_i64(s, a, t);
2051 tcg_temp_free_i64(t);
2052 }
2053 return true;
2054}
2055
2056static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2057{
2058 if (sve_access_check(s)) {
2059 do_insr_i64(s, a, cpu_reg(s, a->rm));
2060 }
2061 return true;
2062}
2063
2064static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2065{
2066 static gen_helper_gvec_2 * const fns[4] = {
2067 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2068 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2069 };
2070
2071 if (sve_access_check(s)) {
2072 unsigned vsz = vec_full_reg_size(s);
2073 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2074 vec_full_reg_offset(s, a->rn),
2075 vsz, vsz, 0, fns[a->esz]);
2076 }
2077 return true;
2078}
2079
2080static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2081{
2082 static gen_helper_gvec_3 * const fns[4] = {
2083 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2084 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2085 };
2086
2087 if (sve_access_check(s)) {
2088 unsigned vsz = vec_full_reg_size(s);
2089 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2090 vec_full_reg_offset(s, a->rn),
2091 vec_full_reg_offset(s, a->rm),
2092 vsz, vsz, 0, fns[a->esz]);
2093 }
2094 return true;
2095}
2096
2097static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2098{
2099 static gen_helper_gvec_2 * const fns[4][2] = {
2100 { NULL, NULL },
2101 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2102 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2103 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2104 };
2105
2106 if (a->esz == 0) {
2107 return false;
2108 }
2109 if (sve_access_check(s)) {
2110 unsigned vsz = vec_full_reg_size(s);
2111 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2112 vec_full_reg_offset(s, a->rn)
2113 + (a->h ? vsz / 2 : 0),
2114 vsz, vsz, 0, fns[a->esz][a->u]);
2115 }
2116 return true;
2117}
2118
d731d8cb
RH
2119/*
2120 *** SVE Permute - Predicates Group
2121 */
2122
2123static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2124 gen_helper_gvec_3 *fn)
2125{
2126 if (!sve_access_check(s)) {
2127 return true;
2128 }
2129
2130 unsigned vsz = pred_full_reg_size(s);
2131
2132 /* Predicate sizes may be smaller and cannot use simd_desc.
2133 We cannot round up, as we do elsewhere, because we need
2134 the exact size for ZIP2 and REV. We retain the style for
2135 the other helpers for consistency. */
2136 TCGv_ptr t_d = tcg_temp_new_ptr();
2137 TCGv_ptr t_n = tcg_temp_new_ptr();
2138 TCGv_ptr t_m = tcg_temp_new_ptr();
2139 TCGv_i32 t_desc;
2140 int desc;
2141
2142 desc = vsz - 2;
2143 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2144 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2145
2146 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2147 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2148 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2149 t_desc = tcg_const_i32(desc);
2150
2151 fn(t_d, t_n, t_m, t_desc);
2152
2153 tcg_temp_free_ptr(t_d);
2154 tcg_temp_free_ptr(t_n);
2155 tcg_temp_free_ptr(t_m);
2156 tcg_temp_free_i32(t_desc);
2157 return true;
2158}
2159
2160static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2161 gen_helper_gvec_2 *fn)
2162{
2163 if (!sve_access_check(s)) {
2164 return true;
2165 }
2166
2167 unsigned vsz = pred_full_reg_size(s);
2168 TCGv_ptr t_d = tcg_temp_new_ptr();
2169 TCGv_ptr t_n = tcg_temp_new_ptr();
2170 TCGv_i32 t_desc;
2171 int desc;
2172
2173 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2174 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2175
2176 /* Predicate sizes may be smaller and cannot use simd_desc.
2177 We cannot round up, as we do elsewhere, because we need
2178 the exact size for ZIP2 and REV. We retain the style for
2179 the other helpers for consistency. */
2180
2181 desc = vsz - 2;
2182 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2183 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2184 t_desc = tcg_const_i32(desc);
2185
2186 fn(t_d, t_n, t_desc);
2187
2188 tcg_temp_free_i32(t_desc);
2189 tcg_temp_free_ptr(t_d);
2190 tcg_temp_free_ptr(t_n);
2191 return true;
2192}
2193
2194static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2195{
2196 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2197}
2198
2199static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2200{
2201 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2202}
2203
2204static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2205{
2206 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2207}
2208
2209static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2210{
2211 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2212}
2213
2214static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2215{
2216 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2217}
2218
2219static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2220{
2221 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2222}
2223
2224static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2225{
2226 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2227}
2228
2229static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2230{
2231 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2232}
2233
2234static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2235{
2236 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2237}
2238
234b48e9
RH
2239/*
2240 *** SVE Permute - Interleaving Group
2241 */
2242
2243static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2244{
2245 static gen_helper_gvec_3 * const fns[4] = {
2246 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2247 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2248 };
2249
2250 if (sve_access_check(s)) {
2251 unsigned vsz = vec_full_reg_size(s);
2252 unsigned high_ofs = high ? vsz / 2 : 0;
2253 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2254 vec_full_reg_offset(s, a->rn) + high_ofs,
2255 vec_full_reg_offset(s, a->rm) + high_ofs,
2256 vsz, vsz, 0, fns[a->esz]);
2257 }
2258 return true;
2259}
2260
2261static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2262 gen_helper_gvec_3 *fn)
2263{
2264 if (sve_access_check(s)) {
2265 unsigned vsz = vec_full_reg_size(s);
2266 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2267 vec_full_reg_offset(s, a->rn),
2268 vec_full_reg_offset(s, a->rm),
2269 vsz, vsz, data, fn);
2270 }
2271 return true;
2272}
2273
2274static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2275{
2276 return do_zip(s, a, false);
2277}
2278
2279static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2280{
2281 return do_zip(s, a, true);
2282}
2283
2284static gen_helper_gvec_3 * const uzp_fns[4] = {
2285 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2286 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2287};
2288
2289static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2290{
2291 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2292}
2293
2294static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2295{
2296 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2297}
2298
2299static gen_helper_gvec_3 * const trn_fns[4] = {
2300 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2301 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2302};
2303
2304static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2305{
2306 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2307}
2308
2309static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2310{
2311 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2312}
2313
3ca879ae
RH
2314/*
2315 *** SVE Permute Vector - Predicated Group
2316 */
2317
2318static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2319{
2320 static gen_helper_gvec_3 * const fns[4] = {
2321 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2322 };
2323 return do_zpz_ool(s, a, fns[a->esz]);
2324}
2325
ef23cb72
RH
2326/* Call the helper that computes the ARM LastActiveElement pseudocode
2327 * function, scaled by the element size. This includes the not found
2328 * indication; e.g. not found for esz=3 is -8.
2329 */
2330static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2331{
2332 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2333 * round up, as we do elsewhere, because we need the exact size.
2334 */
2335 TCGv_ptr t_p = tcg_temp_new_ptr();
2336 TCGv_i32 t_desc;
2337 unsigned vsz = pred_full_reg_size(s);
2338 unsigned desc;
2339
2340 desc = vsz - 2;
2341 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2342
2343 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2344 t_desc = tcg_const_i32(desc);
2345
2346 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2347
2348 tcg_temp_free_i32(t_desc);
2349 tcg_temp_free_ptr(t_p);
2350}
2351
2352/* Increment LAST to the offset of the next element in the vector,
2353 * wrapping around to 0.
2354 */
2355static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2356{
2357 unsigned vsz = vec_full_reg_size(s);
2358
2359 tcg_gen_addi_i32(last, last, 1 << esz);
2360 if (is_power_of_2(vsz)) {
2361 tcg_gen_andi_i32(last, last, vsz - 1);
2362 } else {
2363 TCGv_i32 max = tcg_const_i32(vsz);
2364 TCGv_i32 zero = tcg_const_i32(0);
2365 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2366 tcg_temp_free_i32(max);
2367 tcg_temp_free_i32(zero);
2368 }
2369}
2370
2371/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2372static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2373{
2374 unsigned vsz = vec_full_reg_size(s);
2375
2376 if (is_power_of_2(vsz)) {
2377 tcg_gen_andi_i32(last, last, vsz - 1);
2378 } else {
2379 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2380 TCGv_i32 zero = tcg_const_i32(0);
2381 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2382 tcg_temp_free_i32(max);
2383 tcg_temp_free_i32(zero);
2384 }
2385}
2386
2387/* Load an unsigned element of ESZ from BASE+OFS. */
2388static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2389{
2390 TCGv_i64 r = tcg_temp_new_i64();
2391
2392 switch (esz) {
2393 case 0:
2394 tcg_gen_ld8u_i64(r, base, ofs);
2395 break;
2396 case 1:
2397 tcg_gen_ld16u_i64(r, base, ofs);
2398 break;
2399 case 2:
2400 tcg_gen_ld32u_i64(r, base, ofs);
2401 break;
2402 case 3:
2403 tcg_gen_ld_i64(r, base, ofs);
2404 break;
2405 default:
2406 g_assert_not_reached();
2407 }
2408 return r;
2409}
2410
2411/* Load an unsigned element of ESZ from RM[LAST]. */
2412static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2413 int rm, int esz)
2414{
2415 TCGv_ptr p = tcg_temp_new_ptr();
2416 TCGv_i64 r;
2417
2418 /* Convert offset into vector into offset into ENV.
2419 * The final adjustment for the vector register base
2420 * is added via constant offset to the load.
2421 */
2422#ifdef HOST_WORDS_BIGENDIAN
2423 /* Adjust for element ordering. See vec_reg_offset. */
2424 if (esz < 3) {
2425 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2426 }
2427#endif
2428 tcg_gen_ext_i32_ptr(p, last);
2429 tcg_gen_add_ptr(p, p, cpu_env);
2430
2431 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2432 tcg_temp_free_ptr(p);
2433
2434 return r;
2435}
2436
2437/* Compute CLAST for a Zreg. */
2438static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2439{
2440 TCGv_i32 last;
2441 TCGLabel *over;
2442 TCGv_i64 ele;
2443 unsigned vsz, esz = a->esz;
2444
2445 if (!sve_access_check(s)) {
2446 return true;
2447 }
2448
2449 last = tcg_temp_local_new_i32();
2450 over = gen_new_label();
2451
2452 find_last_active(s, last, esz, a->pg);
2453
2454 /* There is of course no movcond for a 2048-bit vector,
2455 * so we must branch over the actual store.
2456 */
2457 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2458
2459 if (!before) {
2460 incr_last_active(s, last, esz);
2461 }
2462
2463 ele = load_last_active(s, last, a->rm, esz);
2464 tcg_temp_free_i32(last);
2465
2466 vsz = vec_full_reg_size(s);
2467 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2468 tcg_temp_free_i64(ele);
2469
2470 /* If this insn used MOVPRFX, we may need a second move. */
2471 if (a->rd != a->rn) {
2472 TCGLabel *done = gen_new_label();
2473 tcg_gen_br(done);
2474
2475 gen_set_label(over);
2476 do_mov_z(s, a->rd, a->rn);
2477
2478 gen_set_label(done);
2479 } else {
2480 gen_set_label(over);
2481 }
2482 return true;
2483}
2484
2485static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2486{
2487 return do_clast_vector(s, a, false);
2488}
2489
2490static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2491{
2492 return do_clast_vector(s, a, true);
2493}
2494
2495/* Compute CLAST for a scalar. */
2496static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2497 bool before, TCGv_i64 reg_val)
2498{
2499 TCGv_i32 last = tcg_temp_new_i32();
2500 TCGv_i64 ele, cmp, zero;
2501
2502 find_last_active(s, last, esz, pg);
2503
2504 /* Extend the original value of last prior to incrementing. */
2505 cmp = tcg_temp_new_i64();
2506 tcg_gen_ext_i32_i64(cmp, last);
2507
2508 if (!before) {
2509 incr_last_active(s, last, esz);
2510 }
2511
2512 /* The conceit here is that while last < 0 indicates not found, after
2513 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2514 * from which we can load garbage. We then discard the garbage with
2515 * a conditional move.
2516 */
2517 ele = load_last_active(s, last, rm, esz);
2518 tcg_temp_free_i32(last);
2519
2520 zero = tcg_const_i64(0);
2521 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2522
2523 tcg_temp_free_i64(zero);
2524 tcg_temp_free_i64(cmp);
2525 tcg_temp_free_i64(ele);
2526}
2527
2528/* Compute CLAST for a Vreg. */
2529static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2530{
2531 if (sve_access_check(s)) {
2532 int esz = a->esz;
2533 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2534 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2535
2536 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2537 write_fp_dreg(s, a->rd, reg);
2538 tcg_temp_free_i64(reg);
2539 }
2540 return true;
2541}
2542
2543static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2544{
2545 return do_clast_fp(s, a, false);
2546}
2547
2548static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2549{
2550 return do_clast_fp(s, a, true);
2551}
2552
2553/* Compute CLAST for a Xreg. */
2554static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2555{
2556 TCGv_i64 reg;
2557
2558 if (!sve_access_check(s)) {
2559 return true;
2560 }
2561
2562 reg = cpu_reg(s, a->rd);
2563 switch (a->esz) {
2564 case 0:
2565 tcg_gen_ext8u_i64(reg, reg);
2566 break;
2567 case 1:
2568 tcg_gen_ext16u_i64(reg, reg);
2569 break;
2570 case 2:
2571 tcg_gen_ext32u_i64(reg, reg);
2572 break;
2573 case 3:
2574 break;
2575 default:
2576 g_assert_not_reached();
2577 }
2578
2579 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2580 return true;
2581}
2582
2583static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2584{
2585 return do_clast_general(s, a, false);
2586}
2587
2588static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2589{
2590 return do_clast_general(s, a, true);
2591}
2592
2593/* Compute LAST for a scalar. */
2594static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2595 int pg, int rm, bool before)
2596{
2597 TCGv_i32 last = tcg_temp_new_i32();
2598 TCGv_i64 ret;
2599
2600 find_last_active(s, last, esz, pg);
2601 if (before) {
2602 wrap_last_active(s, last, esz);
2603 } else {
2604 incr_last_active(s, last, esz);
2605 }
2606
2607 ret = load_last_active(s, last, rm, esz);
2608 tcg_temp_free_i32(last);
2609 return ret;
2610}
2611
2612/* Compute LAST for a Vreg. */
2613static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2614{
2615 if (sve_access_check(s)) {
2616 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2617 write_fp_dreg(s, a->rd, val);
2618 tcg_temp_free_i64(val);
2619 }
2620 return true;
2621}
2622
2623static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2624{
2625 return do_last_fp(s, a, false);
2626}
2627
2628static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2629{
2630 return do_last_fp(s, a, true);
2631}
2632
2633/* Compute LAST for a Xreg. */
2634static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2635{
2636 if (sve_access_check(s)) {
2637 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2638 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2639 tcg_temp_free_i64(val);
2640 }
2641 return true;
2642}
2643
2644static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2645{
2646 return do_last_general(s, a, false);
2647}
2648
2649static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2650{
2651 return do_last_general(s, a, true);
2652}
2653
792a5578
RH
2654static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2655{
2656 if (sve_access_check(s)) {
2657 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2658 }
2659 return true;
2660}
2661
2662static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2663{
2664 if (sve_access_check(s)) {
2665 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2666 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2667 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2668 tcg_temp_free_i64(t);
2669 }
2670 return true;
2671}
2672
dae8fb90
RH
2673static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2674{
2675 static gen_helper_gvec_3 * const fns[4] = {
2676 NULL,
2677 gen_helper_sve_revb_h,
2678 gen_helper_sve_revb_s,
2679 gen_helper_sve_revb_d,
2680 };
2681 return do_zpz_ool(s, a, fns[a->esz]);
2682}
2683
2684static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2685{
2686 static gen_helper_gvec_3 * const fns[4] = {
2687 NULL,
2688 NULL,
2689 gen_helper_sve_revh_s,
2690 gen_helper_sve_revh_d,
2691 };
2692 return do_zpz_ool(s, a, fns[a->esz]);
2693}
2694
2695static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2696{
2697 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2698}
2699
2700static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2701{
2702 static gen_helper_gvec_3 * const fns[4] = {
2703 gen_helper_sve_rbit_b,
2704 gen_helper_sve_rbit_h,
2705 gen_helper_sve_rbit_s,
2706 gen_helper_sve_rbit_d,
2707 };
2708 return do_zpz_ool(s, a, fns[a->esz]);
2709}
2710
b48ff240
RH
2711static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2712{
2713 if (sve_access_check(s)) {
2714 unsigned vsz = vec_full_reg_size(s);
2715 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2716 vec_full_reg_offset(s, a->rn),
2717 vec_full_reg_offset(s, a->rm),
2718 pred_full_reg_offset(s, a->pg),
2719 vsz, vsz, a->esz, gen_helper_sve_splice);
2720 }
2721 return true;
2722}
2723
757f9cff
RH
2724/*
2725 *** SVE Integer Compare - Vectors Group
2726 */
2727
2728static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2729 gen_helper_gvec_flags_4 *gen_fn)
2730{
2731 TCGv_ptr pd, zn, zm, pg;
2732 unsigned vsz;
2733 TCGv_i32 t;
2734
2735 if (gen_fn == NULL) {
2736 return false;
2737 }
2738 if (!sve_access_check(s)) {
2739 return true;
2740 }
2741
2742 vsz = vec_full_reg_size(s);
2743 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2744 pd = tcg_temp_new_ptr();
2745 zn = tcg_temp_new_ptr();
2746 zm = tcg_temp_new_ptr();
2747 pg = tcg_temp_new_ptr();
2748
2749 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2750 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2751 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2752 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2753
2754 gen_fn(t, pd, zn, zm, pg, t);
2755
2756 tcg_temp_free_ptr(pd);
2757 tcg_temp_free_ptr(zn);
2758 tcg_temp_free_ptr(zm);
2759 tcg_temp_free_ptr(pg);
2760
2761 do_pred_flags(t);
2762
2763 tcg_temp_free_i32(t);
2764 return true;
2765}
2766
2767#define DO_PPZZ(NAME, name) \
2768static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2769 uint32_t insn) \
2770{ \
2771 static gen_helper_gvec_flags_4 * const fns[4] = { \
2772 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2773 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2774 }; \
2775 return do_ppzz_flags(s, a, fns[a->esz]); \
2776}
2777
2778DO_PPZZ(CMPEQ, cmpeq)
2779DO_PPZZ(CMPNE, cmpne)
2780DO_PPZZ(CMPGT, cmpgt)
2781DO_PPZZ(CMPGE, cmpge)
2782DO_PPZZ(CMPHI, cmphi)
2783DO_PPZZ(CMPHS, cmphs)
2784
2785#undef DO_PPZZ
2786
2787#define DO_PPZW(NAME, name) \
2788static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2789 uint32_t insn) \
2790{ \
2791 static gen_helper_gvec_flags_4 * const fns[4] = { \
2792 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2793 gen_helper_sve_##name##_ppzw_s, NULL \
2794 }; \
2795 return do_ppzz_flags(s, a, fns[a->esz]); \
2796}
2797
2798DO_PPZW(CMPEQ, cmpeq)
2799DO_PPZW(CMPNE, cmpne)
2800DO_PPZW(CMPGT, cmpgt)
2801DO_PPZW(CMPGE, cmpge)
2802DO_PPZW(CMPHI, cmphi)
2803DO_PPZW(CMPHS, cmphs)
2804DO_PPZW(CMPLT, cmplt)
2805DO_PPZW(CMPLE, cmple)
2806DO_PPZW(CMPLO, cmplo)
2807DO_PPZW(CMPLS, cmpls)
2808
2809#undef DO_PPZW
2810
38cadeba
RH
2811/*
2812 *** SVE Integer Compare - Immediate Groups
2813 */
2814
2815static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2816 gen_helper_gvec_flags_3 *gen_fn)
2817{
2818 TCGv_ptr pd, zn, pg;
2819 unsigned vsz;
2820 TCGv_i32 t;
2821
2822 if (gen_fn == NULL) {
2823 return false;
2824 }
2825 if (!sve_access_check(s)) {
2826 return true;
2827 }
2828
2829 vsz = vec_full_reg_size(s);
2830 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2831 pd = tcg_temp_new_ptr();
2832 zn = tcg_temp_new_ptr();
2833 pg = tcg_temp_new_ptr();
2834
2835 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2836 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2837 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2838
2839 gen_fn(t, pd, zn, pg, t);
2840
2841 tcg_temp_free_ptr(pd);
2842 tcg_temp_free_ptr(zn);
2843 tcg_temp_free_ptr(pg);
2844
2845 do_pred_flags(t);
2846
2847 tcg_temp_free_i32(t);
2848 return true;
2849}
2850
2851#define DO_PPZI(NAME, name) \
2852static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2853 uint32_t insn) \
2854{ \
2855 static gen_helper_gvec_flags_3 * const fns[4] = { \
2856 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2857 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2858 }; \
2859 return do_ppzi_flags(s, a, fns[a->esz]); \
2860}
2861
2862DO_PPZI(CMPEQ, cmpeq)
2863DO_PPZI(CMPNE, cmpne)
2864DO_PPZI(CMPGT, cmpgt)
2865DO_PPZI(CMPGE, cmpge)
2866DO_PPZI(CMPHI, cmphi)
2867DO_PPZI(CMPHS, cmphs)
2868DO_PPZI(CMPLT, cmplt)
2869DO_PPZI(CMPLE, cmple)
2870DO_PPZI(CMPLO, cmplo)
2871DO_PPZI(CMPLS, cmpls)
2872
2873#undef DO_PPZI
2874
35da316f
RH
2875/*
2876 *** SVE Partition Break Group
2877 */
2878
2879static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2880 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2881{
2882 if (!sve_access_check(s)) {
2883 return true;
2884 }
2885
2886 unsigned vsz = pred_full_reg_size(s);
2887
2888 /* Predicate sizes may be smaller and cannot use simd_desc. */
2889 TCGv_ptr d = tcg_temp_new_ptr();
2890 TCGv_ptr n = tcg_temp_new_ptr();
2891 TCGv_ptr m = tcg_temp_new_ptr();
2892 TCGv_ptr g = tcg_temp_new_ptr();
2893 TCGv_i32 t = tcg_const_i32(vsz - 2);
2894
2895 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2896 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2897 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2898 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2899
2900 if (a->s) {
2901 fn_s(t, d, n, m, g, t);
2902 do_pred_flags(t);
2903 } else {
2904 fn(d, n, m, g, t);
2905 }
2906 tcg_temp_free_ptr(d);
2907 tcg_temp_free_ptr(n);
2908 tcg_temp_free_ptr(m);
2909 tcg_temp_free_ptr(g);
2910 tcg_temp_free_i32(t);
2911 return true;
2912}
2913
2914static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2915 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2916{
2917 if (!sve_access_check(s)) {
2918 return true;
2919 }
2920
2921 unsigned vsz = pred_full_reg_size(s);
2922
2923 /* Predicate sizes may be smaller and cannot use simd_desc. */
2924 TCGv_ptr d = tcg_temp_new_ptr();
2925 TCGv_ptr n = tcg_temp_new_ptr();
2926 TCGv_ptr g = tcg_temp_new_ptr();
2927 TCGv_i32 t = tcg_const_i32(vsz - 2);
2928
2929 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2930 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2931 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2932
2933 if (a->s) {
2934 fn_s(t, d, n, g, t);
2935 do_pred_flags(t);
2936 } else {
2937 fn(d, n, g, t);
2938 }
2939 tcg_temp_free_ptr(d);
2940 tcg_temp_free_ptr(n);
2941 tcg_temp_free_ptr(g);
2942 tcg_temp_free_i32(t);
2943 return true;
2944}
2945
2946static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2947{
2948 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2949}
2950
2951static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2952{
2953 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2954}
2955
2956static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2957{
2958 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2959}
2960
2961static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2962{
2963 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2964}
2965
2966static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2967{
2968 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2969}
2970
2971static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2972{
2973 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2974}
2975
2976static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2977{
2978 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2979}
2980
9ee3a611
RH
2981/*
2982 *** SVE Predicate Count Group
2983 */
2984
2985static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2986{
2987 unsigned psz = pred_full_reg_size(s);
2988
2989 if (psz <= 8) {
2990 uint64_t psz_mask;
2991
2992 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2993 if (pn != pg) {
2994 TCGv_i64 g = tcg_temp_new_i64();
2995 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2996 tcg_gen_and_i64(val, val, g);
2997 tcg_temp_free_i64(g);
2998 }
2999
3000 /* Reduce the pred_esz_masks value simply to reduce the
3001 * size of the code generated here.
3002 */
3003 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3004 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3005
3006 tcg_gen_ctpop_i64(val, val);
3007 } else {
3008 TCGv_ptr t_pn = tcg_temp_new_ptr();
3009 TCGv_ptr t_pg = tcg_temp_new_ptr();
3010 unsigned desc;
3011 TCGv_i32 t_desc;
3012
3013 desc = psz - 2;
3014 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3015
3016 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3017 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3018 t_desc = tcg_const_i32(desc);
3019
3020 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3021 tcg_temp_free_ptr(t_pn);
3022 tcg_temp_free_ptr(t_pg);
3023 tcg_temp_free_i32(t_desc);
3024 }
3025}
3026
3027static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3028{
3029 if (sve_access_check(s)) {
3030 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3031 }
3032 return true;
3033}
3034
3035static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3036 uint32_t insn)
3037{
3038 if (sve_access_check(s)) {
3039 TCGv_i64 reg = cpu_reg(s, a->rd);
3040 TCGv_i64 val = tcg_temp_new_i64();
3041
3042 do_cntp(s, val, a->esz, a->pg, a->pg);
3043 if (a->d) {
3044 tcg_gen_sub_i64(reg, reg, val);
3045 } else {
3046 tcg_gen_add_i64(reg, reg, val);
3047 }
3048 tcg_temp_free_i64(val);
3049 }
3050 return true;
3051}
3052
3053static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3054 uint32_t insn)
3055{
3056 if (a->esz == 0) {
3057 return false;
3058 }
3059 if (sve_access_check(s)) {
3060 unsigned vsz = vec_full_reg_size(s);
3061 TCGv_i64 val = tcg_temp_new_i64();
3062 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3063
3064 do_cntp(s, val, a->esz, a->pg, a->pg);
3065 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3066 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3067 }
3068 return true;
3069}
3070
3071static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3072 uint32_t insn)
3073{
3074 if (sve_access_check(s)) {
3075 TCGv_i64 reg = cpu_reg(s, a->rd);
3076 TCGv_i64 val = tcg_temp_new_i64();
3077
3078 do_cntp(s, val, a->esz, a->pg, a->pg);
3079 do_sat_addsub_32(reg, val, a->u, a->d);
3080 }
3081 return true;
3082}
3083
3084static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3085 uint32_t insn)
3086{
3087 if (sve_access_check(s)) {
3088 TCGv_i64 reg = cpu_reg(s, a->rd);
3089 TCGv_i64 val = tcg_temp_new_i64();
3090
3091 do_cntp(s, val, a->esz, a->pg, a->pg);
3092 do_sat_addsub_64(reg, val, a->u, a->d);
3093 }
3094 return true;
3095}
3096
3097static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3098 uint32_t insn)
3099{
3100 if (a->esz == 0) {
3101 return false;
3102 }
3103 if (sve_access_check(s)) {
3104 TCGv_i64 val = tcg_temp_new_i64();
3105 do_cntp(s, val, a->esz, a->pg, a->pg);
3106 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3107 }
3108 return true;
3109}
3110
caf1cefc
RH
3111/*
3112 *** SVE Integer Compare Scalars Group
3113 */
3114
3115static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3116{
3117 if (!sve_access_check(s)) {
3118 return true;
3119 }
3120
3121 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3122 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3123 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3124 TCGv_i64 cmp = tcg_temp_new_i64();
3125
3126 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3127 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3128 tcg_temp_free_i64(cmp);
3129
3130 /* VF = !NF & !CF. */
3131 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3132 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3133
3134 /* Both NF and VF actually look at bit 31. */
3135 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3136 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3137 return true;
3138}
3139
3140static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3141{
3142 if (!sve_access_check(s)) {
3143 return true;
3144 }
3145
3146 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3147 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3148 TCGv_i64 t0 = tcg_temp_new_i64();
3149 TCGv_i64 t1 = tcg_temp_new_i64();
3150 TCGv_i32 t2, t3;
3151 TCGv_ptr ptr;
3152 unsigned desc, vsz = vec_full_reg_size(s);
3153 TCGCond cond;
3154
3155 if (!a->sf) {
3156 if (a->u) {
3157 tcg_gen_ext32u_i64(op0, op0);
3158 tcg_gen_ext32u_i64(op1, op1);
3159 } else {
3160 tcg_gen_ext32s_i64(op0, op0);
3161 tcg_gen_ext32s_i64(op1, op1);
3162 }
3163 }
3164
3165 /* For the helper, compress the different conditions into a computation
3166 * of how many iterations for which the condition is true.
3167 *
3168 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3169 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3170 * aren't that large, so any value >= predicate size is sufficient.
3171 */
3172 tcg_gen_sub_i64(t0, op1, op0);
3173
3174 /* t0 = MIN(op1 - op0, vsz). */
3175 tcg_gen_movi_i64(t1, vsz);
3176 tcg_gen_umin_i64(t0, t0, t1);
3177 if (a->eq) {
3178 /* Equality means one more iteration. */
3179 tcg_gen_addi_i64(t0, t0, 1);
3180 }
3181
3182 /* t0 = (condition true ? t0 : 0). */
3183 cond = (a->u
3184 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3185 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3186 tcg_gen_movi_i64(t1, 0);
3187 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3188
3189 t2 = tcg_temp_new_i32();
3190 tcg_gen_extrl_i64_i32(t2, t0);
3191 tcg_temp_free_i64(t0);
3192 tcg_temp_free_i64(t1);
3193
3194 desc = (vsz / 8) - 2;
3195 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3196 t3 = tcg_const_i32(desc);
3197
3198 ptr = tcg_temp_new_ptr();
3199 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3200
3201 gen_helper_sve_while(t2, ptr, t2, t3);
3202 do_pred_flags(t2);
3203
3204 tcg_temp_free_ptr(ptr);
3205 tcg_temp_free_i32(t2);
3206 tcg_temp_free_i32(t3);
3207 return true;
3208}
3209
ed491961
RH
3210/*
3211 *** SVE Integer Wide Immediate - Unpredicated Group
3212 */
3213
3214static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3215{
3216 if (a->esz == 0) {
3217 return false;
3218 }
3219 if (sve_access_check(s)) {
3220 unsigned vsz = vec_full_reg_size(s);
3221 int dofs = vec_full_reg_offset(s, a->rd);
3222 uint64_t imm;
3223
3224 /* Decode the VFP immediate. */
3225 imm = vfp_expand_imm(a->esz, a->imm);
3226 imm = dup_const(a->esz, imm);
3227
3228 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3229 }
3230 return true;
3231}
3232
3233static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3234{
3235 if (a->esz == 0 && extract32(insn, 13, 1)) {
3236 return false;
3237 }
3238 if (sve_access_check(s)) {
3239 unsigned vsz = vec_full_reg_size(s);
3240 int dofs = vec_full_reg_offset(s, a->rd);
3241
3242 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3243 }
3244 return true;
3245}
3246
6e6a157d
RH
3247static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3248{
3249 if (a->esz == 0 && extract32(insn, 13, 1)) {
3250 return false;
3251 }
3252 if (sve_access_check(s)) {
3253 unsigned vsz = vec_full_reg_size(s);
3254 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3255 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3256 }
3257 return true;
3258}
3259
3260static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3261{
3262 a->imm = -a->imm;
3263 return trans_ADD_zzi(s, a, insn);
3264}
3265
3266static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3267{
3268 static const GVecGen2s op[4] = {
3269 { .fni8 = tcg_gen_vec_sub8_i64,
3270 .fniv = tcg_gen_sub_vec,
3271 .fno = gen_helper_sve_subri_b,
3272 .opc = INDEX_op_sub_vec,
3273 .vece = MO_8,
3274 .scalar_first = true },
3275 { .fni8 = tcg_gen_vec_sub16_i64,
3276 .fniv = tcg_gen_sub_vec,
3277 .fno = gen_helper_sve_subri_h,
3278 .opc = INDEX_op_sub_vec,
3279 .vece = MO_16,
3280 .scalar_first = true },
3281 { .fni4 = tcg_gen_sub_i32,
3282 .fniv = tcg_gen_sub_vec,
3283 .fno = gen_helper_sve_subri_s,
3284 .opc = INDEX_op_sub_vec,
3285 .vece = MO_32,
3286 .scalar_first = true },
3287 { .fni8 = tcg_gen_sub_i64,
3288 .fniv = tcg_gen_sub_vec,
3289 .fno = gen_helper_sve_subri_d,
3290 .opc = INDEX_op_sub_vec,
3291 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3292 .vece = MO_64,
3293 .scalar_first = true }
3294 };
3295
3296 if (a->esz == 0 && extract32(insn, 13, 1)) {
3297 return false;
3298 }
3299 if (sve_access_check(s)) {
3300 unsigned vsz = vec_full_reg_size(s);
3301 TCGv_i64 c = tcg_const_i64(a->imm);
3302 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3303 vec_full_reg_offset(s, a->rn),
3304 vsz, vsz, c, &op[a->esz]);
3305 tcg_temp_free_i64(c);
3306 }
3307 return true;
3308}
3309
3310static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3311{
3312 if (sve_access_check(s)) {
3313 unsigned vsz = vec_full_reg_size(s);
3314 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3315 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3316 }
3317 return true;
3318}
3319
3320static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3321 bool u, bool d)
3322{
3323 if (a->esz == 0 && extract32(insn, 13, 1)) {
3324 return false;
3325 }
3326 if (sve_access_check(s)) {
3327 TCGv_i64 val = tcg_const_i64(a->imm);
3328 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3329 tcg_temp_free_i64(val);
3330 }
3331 return true;
3332}
3333
3334static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3335{
3336 return do_zzi_sat(s, a, insn, false, false);
3337}
3338
3339static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3340{
3341 return do_zzi_sat(s, a, insn, true, false);
3342}
3343
3344static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3345{
3346 return do_zzi_sat(s, a, insn, false, true);
3347}
3348
3349static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3350{
3351 return do_zzi_sat(s, a, insn, true, true);
3352}
3353
3354static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3355{
3356 if (sve_access_check(s)) {
3357 unsigned vsz = vec_full_reg_size(s);
3358 TCGv_i64 c = tcg_const_i64(a->imm);
3359
3360 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3361 vec_full_reg_offset(s, a->rn),
3362 c, vsz, vsz, 0, fn);
3363 tcg_temp_free_i64(c);
3364 }
3365 return true;
3366}
3367
3368#define DO_ZZI(NAME, name) \
3369static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3370 uint32_t insn) \
3371{ \
3372 static gen_helper_gvec_2i * const fns[4] = { \
3373 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3374 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3375 }; \
3376 return do_zzi_ool(s, a, fns[a->esz]); \
3377}
3378
3379DO_ZZI(SMAX, smax)
3380DO_ZZI(UMAX, umax)
3381DO_ZZI(SMIN, smin)
3382DO_ZZI(UMIN, umin)
3383
3384#undef DO_ZZI
3385
7f9ddf64
RH
3386/*
3387 *** SVE Floating Point Accumulating Reduction Group
3388 */
3389
3390static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3391{
3392 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3393 TCGv_ptr, TCGv_ptr, TCGv_i32);
3394 static fadda_fn * const fns[3] = {
3395 gen_helper_sve_fadda_h,
3396 gen_helper_sve_fadda_s,
3397 gen_helper_sve_fadda_d,
3398 };
3399 unsigned vsz = vec_full_reg_size(s);
3400 TCGv_ptr t_rm, t_pg, t_fpst;
3401 TCGv_i64 t_val;
3402 TCGv_i32 t_desc;
3403
3404 if (a->esz == 0) {
3405 return false;
3406 }
3407 if (!sve_access_check(s)) {
3408 return true;
3409 }
3410
3411 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3412 t_rm = tcg_temp_new_ptr();
3413 t_pg = tcg_temp_new_ptr();
3414 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3415 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3416 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3417 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3418
3419 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3420
3421 tcg_temp_free_i32(t_desc);
3422 tcg_temp_free_ptr(t_fpst);
3423 tcg_temp_free_ptr(t_pg);
3424 tcg_temp_free_ptr(t_rm);
3425
3426 write_fp_dreg(s, a->rd, t_val);
3427 tcg_temp_free_i64(t_val);
3428 return true;
3429}
3430
29b80469
RH
3431/*
3432 *** SVE Floating Point Arithmetic - Unpredicated Group
3433 */
3434
3435static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3436 gen_helper_gvec_3_ptr *fn)
3437{
3438 if (fn == NULL) {
3439 return false;
3440 }
3441 if (sve_access_check(s)) {
3442 unsigned vsz = vec_full_reg_size(s);
3443 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3444 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3445 vec_full_reg_offset(s, a->rn),
3446 vec_full_reg_offset(s, a->rm),
3447 status, vsz, vsz, 0, fn);
3448 tcg_temp_free_ptr(status);
3449 }
3450 return true;
3451}
3452
3453
3454#define DO_FP3(NAME, name) \
3455static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3456{ \
3457 static gen_helper_gvec_3_ptr * const fns[4] = { \
3458 NULL, gen_helper_gvec_##name##_h, \
3459 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3460 }; \
3461 return do_zzz_fp(s, a, fns[a->esz]); \
3462}
3463
3464DO_FP3(FADD_zzz, fadd)
3465DO_FP3(FSUB_zzz, fsub)
3466DO_FP3(FMUL_zzz, fmul)
3467DO_FP3(FTSMUL, ftsmul)
3468DO_FP3(FRECPS, recps)
3469DO_FP3(FRSQRTS, rsqrts)
3470
3471#undef DO_FP3
3472
ec3b87c2
RH
3473/*
3474 *** SVE Floating Point Arithmetic - Predicated Group
3475 */
3476
3477static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3478 gen_helper_gvec_4_ptr *fn)
3479{
3480 if (fn == NULL) {
3481 return false;
3482 }
3483 if (sve_access_check(s)) {
3484 unsigned vsz = vec_full_reg_size(s);
3485 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3486 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3487 vec_full_reg_offset(s, a->rn),
3488 vec_full_reg_offset(s, a->rm),
3489 pred_full_reg_offset(s, a->pg),
3490 status, vsz, vsz, 0, fn);
3491 tcg_temp_free_ptr(status);
3492 }
3493 return true;
3494}
3495
3496#define DO_FP3(NAME, name) \
3497static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3498{ \
3499 static gen_helper_gvec_4_ptr * const fns[4] = { \
3500 NULL, gen_helper_sve_##name##_h, \
3501 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3502 }; \
3503 return do_zpzz_fp(s, a, fns[a->esz]); \
3504}
3505
3506DO_FP3(FADD_zpzz, fadd)
3507DO_FP3(FSUB_zpzz, fsub)
3508DO_FP3(FMUL_zpzz, fmul)
3509DO_FP3(FMIN_zpzz, fmin)
3510DO_FP3(FMAX_zpzz, fmax)
3511DO_FP3(FMINNM_zpzz, fminnum)
3512DO_FP3(FMAXNM_zpzz, fmaxnum)
3513DO_FP3(FABD, fabd)
3514DO_FP3(FSCALE, fscalbn)
3515DO_FP3(FDIV, fdiv)
3516DO_FP3(FMULX, fmulx)
3517
3518#undef DO_FP3
8092c6a3 3519
6ceabaad
RH
3520typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3521
3522static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3523{
3524 if (fn == NULL) {
3525 return false;
3526 }
3527 if (!sve_access_check(s)) {
3528 return true;
3529 }
3530
3531 unsigned vsz = vec_full_reg_size(s);
3532 unsigned desc;
3533 TCGv_i32 t_desc;
3534 TCGv_ptr pg = tcg_temp_new_ptr();
3535
3536 /* We would need 7 operands to pass these arguments "properly".
3537 * So we encode all the register numbers into the descriptor.
3538 */
3539 desc = deposit32(a->rd, 5, 5, a->rn);
3540 desc = deposit32(desc, 10, 5, a->rm);
3541 desc = deposit32(desc, 15, 5, a->ra);
3542 desc = simd_desc(vsz, vsz, desc);
3543
3544 t_desc = tcg_const_i32(desc);
3545 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3546 fn(cpu_env, pg, t_desc);
3547 tcg_temp_free_i32(t_desc);
3548 tcg_temp_free_ptr(pg);
3549 return true;
3550}
3551
3552#define DO_FMLA(NAME, name) \
3553static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3554{ \
3555 static gen_helper_sve_fmla * const fns[4] = { \
3556 NULL, gen_helper_sve_##name##_h, \
3557 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3558 }; \
3559 return do_fmla(s, a, fns[a->esz]); \
3560}
3561
3562DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3563DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3564DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3565DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3566
3567#undef DO_FMLA
3568
8092c6a3
RH
3569/*
3570 *** SVE Floating Point Unary Operations Predicated Group
3571 */
3572
3573static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3574 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3575{
3576 if (sve_access_check(s)) {
3577 unsigned vsz = vec_full_reg_size(s);
3578 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3579 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3580 vec_full_reg_offset(s, rn),
3581 pred_full_reg_offset(s, pg),
3582 status, vsz, vsz, 0, fn);
3583 tcg_temp_free_ptr(status);
3584 }
3585 return true;
3586}
3587
3588static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3589{
3590 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3591}
3592
3593static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3594{
3595 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3596}
3597
3598static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3599{
3600 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3601}
3602
3603static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3604{
3605 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3606}
3607
3608static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3609{
3610 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3611}
3612
3613static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3614{
3615 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3616}
3617
3618static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3619{
3620 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3621}
3622
3623static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3624{
3625 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3626}
3627
3628static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3629{
3630 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3631}
3632
3633static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3634{
3635 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3636}
3637
3638static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3639{
3640 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3641}
3642
3643static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3644{
3645 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3646}
3647
3648static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3649{
3650 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3651}
3652
3653static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3654{
3655 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3656}
3657
d1822297
RH
3658/*
3659 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3660 */
3661
3662/* Subroutine loading a vector register at VOFS of LEN bytes.
3663 * The load should begin at the address Rn + IMM.
3664 */
3665
3666static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3667 int rn, int imm)
3668{
3669 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3670 uint32_t len_remain = len % 8;
3671 uint32_t nparts = len / 8 + ctpop8(len_remain);
3672 int midx = get_mem_index(s);
3673 TCGv_i64 addr, t0, t1;
3674
3675 addr = tcg_temp_new_i64();
3676 t0 = tcg_temp_new_i64();
3677
3678 /* Note that unpredicated load/store of vector/predicate registers
3679 * are defined as a stream of bytes, which equates to little-endian
3680 * operations on larger quantities. There is no nice way to force
3681 * a little-endian load for aarch64_be-linux-user out of line.
3682 *
3683 * Attempt to keep code expansion to a minimum by limiting the
3684 * amount of unrolling done.
3685 */
3686 if (nparts <= 4) {
3687 int i;
3688
3689 for (i = 0; i < len_align; i += 8) {
3690 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3691 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3692 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3693 }
3694 } else {
3695 TCGLabel *loop = gen_new_label();
3696 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3697
3698 gen_set_label(loop);
3699
3700 /* Minimize the number of local temps that must be re-read from
3701 * the stack each iteration. Instead, re-compute values other
3702 * than the loop counter.
3703 */
3704 tp = tcg_temp_new_ptr();
3705 tcg_gen_addi_ptr(tp, i, imm);
3706 tcg_gen_extu_ptr_i64(addr, tp);
3707 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3708
3709 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3710
3711 tcg_gen_add_ptr(tp, cpu_env, i);
3712 tcg_gen_addi_ptr(i, i, 8);
3713 tcg_gen_st_i64(t0, tp, vofs);
3714 tcg_temp_free_ptr(tp);
3715
3716 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3717 tcg_temp_free_ptr(i);
3718 }
3719
3720 /* Predicate register loads can be any multiple of 2.
3721 * Note that we still store the entire 64-bit unit into cpu_env.
3722 */
3723 if (len_remain) {
3724 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3725
3726 switch (len_remain) {
3727 case 2:
3728 case 4:
3729 case 8:
3730 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3731 break;
3732
3733 case 6:
3734 t1 = tcg_temp_new_i64();
3735 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3736 tcg_gen_addi_i64(addr, addr, 4);
3737 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3738 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3739 tcg_temp_free_i64(t1);
3740 break;
3741
3742 default:
3743 g_assert_not_reached();
3744 }
3745 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3746 }
3747 tcg_temp_free_i64(addr);
3748 tcg_temp_free_i64(t0);
3749}
3750
3751static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3752{
3753 if (sve_access_check(s)) {
3754 int size = vec_full_reg_size(s);
3755 int off = vec_full_reg_offset(s, a->rd);
3756 do_ldr(s, off, size, a->rn, a->imm * size);
3757 }
3758 return true;
3759}
3760
3761static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3762{
3763 if (sve_access_check(s)) {
3764 int size = pred_full_reg_size(s);
3765 int off = pred_full_reg_offset(s, a->rd);
3766 do_ldr(s, off, size, a->rn, a->imm * size);
3767 }
3768 return true;
3769}
c4e7c493
RH
3770
3771/*
3772 *** SVE Memory - Contiguous Load Group
3773 */
3774
3775/* The memory mode of the dtype. */
3776static const TCGMemOp dtype_mop[16] = {
3777 MO_UB, MO_UB, MO_UB, MO_UB,
3778 MO_SL, MO_UW, MO_UW, MO_UW,
3779 MO_SW, MO_SW, MO_UL, MO_UL,
3780 MO_SB, MO_SB, MO_SB, MO_Q
3781};
3782
3783#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
3784
3785/* The vector element size of dtype. */
3786static const uint8_t dtype_esz[16] = {
3787 0, 1, 2, 3,
3788 3, 1, 2, 3,
3789 3, 2, 2, 3,
3790 3, 2, 1, 3
3791};
3792
3793static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3794 gen_helper_gvec_mem *fn)
3795{
3796 unsigned vsz = vec_full_reg_size(s);
3797 TCGv_ptr t_pg;
3798 TCGv_i32 desc;
3799
3800 /* For e.g. LD4, there are not enough arguments to pass all 4
3801 * registers as pointers, so encode the regno into the data field.
3802 * For consistency, do this even for LD1.
3803 */
3804 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
3805 t_pg = tcg_temp_new_ptr();
3806
3807 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3808 fn(cpu_env, t_pg, addr, desc);
3809
3810 tcg_temp_free_ptr(t_pg);
3811 tcg_temp_free_i32(desc);
3812}
3813
3814static void do_ld_zpa(DisasContext *s, int zt, int pg,
3815 TCGv_i64 addr, int dtype, int nreg)
3816{
3817 static gen_helper_gvec_mem * const fns[16][4] = {
3818 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
3819 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
3820 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
3821 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
3822 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
3823
3824 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
3825 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
3826 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
3827 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
3828 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
3829
3830 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
3831 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
3832 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
3833 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
3834 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
3835
3836 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
3837 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
3838 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
3839 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
3840 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
3841 };
3842 gen_helper_gvec_mem *fn = fns[dtype][nreg];
3843
3844 /* While there are holes in the table, they are not
3845 * accessible via the instruction encoding.
3846 */
3847 assert(fn != NULL);
3848 do_mem_zpa(s, zt, pg, addr, fn);
3849}
3850
3851static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3852{
3853 if (a->rm == 31) {
3854 return false;
3855 }
3856 if (sve_access_check(s)) {
3857 TCGv_i64 addr = new_tmp_a64(s);
3858 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
3859 (a->nreg + 1) << dtype_msz(a->dtype));
3860 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3861 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3862 }
3863 return true;
3864}
3865
3866static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3867{
3868 if (sve_access_check(s)) {
3869 int vsz = vec_full_reg_size(s);
3870 int elements = vsz >> dtype_esz[a->dtype];
3871 TCGv_i64 addr = new_tmp_a64(s);
3872
3873 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3874 (a->imm * elements * (a->nreg + 1))
3875 << dtype_msz(a->dtype));
3876 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3877 }
3878 return true;
3879}
e2654d75
RH
3880
3881static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3882{
3883 static gen_helper_gvec_mem * const fns[16] = {
3884 gen_helper_sve_ldff1bb_r,
3885 gen_helper_sve_ldff1bhu_r,
3886 gen_helper_sve_ldff1bsu_r,
3887 gen_helper_sve_ldff1bdu_r,
3888
3889 gen_helper_sve_ldff1sds_r,
3890 gen_helper_sve_ldff1hh_r,
3891 gen_helper_sve_ldff1hsu_r,
3892 gen_helper_sve_ldff1hdu_r,
3893
3894 gen_helper_sve_ldff1hds_r,
3895 gen_helper_sve_ldff1hss_r,
3896 gen_helper_sve_ldff1ss_r,
3897 gen_helper_sve_ldff1sdu_r,
3898
3899 gen_helper_sve_ldff1bds_r,
3900 gen_helper_sve_ldff1bss_r,
3901 gen_helper_sve_ldff1bhs_r,
3902 gen_helper_sve_ldff1dd_r,
3903 };
3904
3905 if (sve_access_check(s)) {
3906 TCGv_i64 addr = new_tmp_a64(s);
3907 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
3908 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3909 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
3910 }
3911 return true;
3912}
3913
3914static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3915{
3916 static gen_helper_gvec_mem * const fns[16] = {
3917 gen_helper_sve_ldnf1bb_r,
3918 gen_helper_sve_ldnf1bhu_r,
3919 gen_helper_sve_ldnf1bsu_r,
3920 gen_helper_sve_ldnf1bdu_r,
3921
3922 gen_helper_sve_ldnf1sds_r,
3923 gen_helper_sve_ldnf1hh_r,
3924 gen_helper_sve_ldnf1hsu_r,
3925 gen_helper_sve_ldnf1hdu_r,
3926
3927 gen_helper_sve_ldnf1hds_r,
3928 gen_helper_sve_ldnf1hss_r,
3929 gen_helper_sve_ldnf1ss_r,
3930 gen_helper_sve_ldnf1sdu_r,
3931
3932 gen_helper_sve_ldnf1bds_r,
3933 gen_helper_sve_ldnf1bss_r,
3934 gen_helper_sve_ldnf1bhs_r,
3935 gen_helper_sve_ldnf1dd_r,
3936 };
3937
3938 if (sve_access_check(s)) {
3939 int vsz = vec_full_reg_size(s);
3940 int elements = vsz >> dtype_esz[a->dtype];
3941 int off = (a->imm * elements) << dtype_msz(a->dtype);
3942 TCGv_i64 addr = new_tmp_a64(s);
3943
3944 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
3945 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
3946 }
3947 return true;
3948}
1a039c7e 3949
05abe304
RH
3950static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
3951{
3952 static gen_helper_gvec_mem * const fns[4] = {
3953 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
3954 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
3955 };
3956 unsigned vsz = vec_full_reg_size(s);
3957 TCGv_ptr t_pg;
3958 TCGv_i32 desc;
3959
3960 /* Load the first quadword using the normal predicated load helpers. */
3961 desc = tcg_const_i32(simd_desc(16, 16, zt));
3962 t_pg = tcg_temp_new_ptr();
3963
3964 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3965 fns[msz](cpu_env, t_pg, addr, desc);
3966
3967 tcg_temp_free_ptr(t_pg);
3968 tcg_temp_free_i32(desc);
3969
3970 /* Replicate that first quadword. */
3971 if (vsz > 16) {
3972 unsigned dofs = vec_full_reg_offset(s, zt);
3973 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
3974 }
3975}
3976
3977static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3978{
3979 if (a->rm == 31) {
3980 return false;
3981 }
3982 if (sve_access_check(s)) {
3983 int msz = dtype_msz(a->dtype);
3984 TCGv_i64 addr = new_tmp_a64(s);
3985 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
3986 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3987 do_ldrq(s, a->rd, a->pg, addr, msz);
3988 }
3989 return true;
3990}
3991
3992static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3993{
3994 if (sve_access_check(s)) {
3995 TCGv_i64 addr = new_tmp_a64(s);
3996 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
3997 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
3998 }
3999 return true;
4000}
4001
1a039c7e
RH
4002static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4003 int msz, int esz, int nreg)
4004{
4005 static gen_helper_gvec_mem * const fn_single[4][4] = {
4006 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4007 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4008 { NULL, gen_helper_sve_st1hh_r,
4009 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4010 { NULL, NULL,
4011 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4012 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4013 };
4014 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4015 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4016 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4017 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4018 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4019 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4020 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4021 };
4022 gen_helper_gvec_mem *fn;
4023
4024 if (nreg == 0) {
4025 /* ST1 */
4026 fn = fn_single[msz][esz];
4027 } else {
4028 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4029 assert(msz == esz);
4030 fn = fn_multiple[nreg - 1][msz];
4031 }
4032 assert(fn != NULL);
4033 do_mem_zpa(s, zt, pg, addr, fn);
4034}
4035
4036static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4037{
4038 if (a->rm == 31 || a->msz > a->esz) {
4039 return false;
4040 }
4041 if (sve_access_check(s)) {
4042 TCGv_i64 addr = new_tmp_a64(s);
4043 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4044 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4045 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4046 }
4047 return true;
4048}
4049
4050static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4051{
4052 if (a->msz > a->esz) {
4053 return false;
4054 }
4055 if (sve_access_check(s)) {
4056 int vsz = vec_full_reg_size(s);
4057 int elements = vsz >> a->esz;
4058 TCGv_i64 addr = new_tmp_a64(s);
4059
4060 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4061 (a->imm * elements * (a->nreg + 1)) << a->msz);
4062 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4063 }
4064 return true;
4065}