]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement SVE reverse within elements
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
35
ccd841c3
RH
36/*
37 * Helpers for extracting complex instruction fields.
38 */
39
40/* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
42 */
43static int tszimm_esz(int x)
44{
45 x >>= 3; /* discard imm3 */
46 return 31 - clz32(x);
47}
48
49static int tszimm_shr(int x)
50{
51 return (16 << tszimm_esz(x)) - x;
52}
53
54/* See e.g. LSL (immediate, predicated). */
55static int tszimm_shl(int x)
56{
57 return x - (8 << tszimm_esz(x));
58}
59
24e82e68
RH
60static inline int plus1(int x)
61{
62 return x + 1;
63}
64
f25a2361
RH
65/* The SH bit is in bit 8. Extract the low 8 and shift. */
66static inline int expand_imm_sh8s(int x)
67{
68 return (int8_t)x << (x & 0x100 ? 8 : 0);
69}
70
38388f7e
RH
71/*
72 * Include the generated decoder.
73 */
74
75#include "decode-sve.inc.c"
76
77/*
78 * Implement all of the translator functions referenced by the decoder.
79 */
80
d1822297
RH
81/* Return the offset info CPUARMState of the predicate vector register Pn.
82 * Note for this purpose, FFR is P16.
83 */
84static inline int pred_full_reg_offset(DisasContext *s, int regno)
85{
86 return offsetof(CPUARMState, vfp.pregs[regno]);
87}
88
89/* Return the byte size of the whole predicate register, VL / 64. */
90static inline int pred_full_reg_size(DisasContext *s)
91{
92 return s->sve_len >> 3;
93}
94
516e246a
RH
95/* Round up the size of a register to a size allowed by
96 * the tcg vector infrastructure. Any operation which uses this
97 * size may assume that the bits above pred_full_reg_size are zero,
98 * and must leave them the same way.
99 *
100 * Note that this is not needed for the vector registers as they
101 * are always properly sized for tcg vectors.
102 */
103static int size_for_gvec(int size)
104{
105 if (size <= 8) {
106 return 8;
107 } else {
108 return QEMU_ALIGN_UP(size, 16);
109 }
110}
111
112static int pred_gvec_reg_size(DisasContext *s)
113{
114 return size_for_gvec(pred_full_reg_size(s));
115}
116
39eea561
RH
117/* Invoke a vector expander on two Zregs. */
118static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
119 int esz, int rd, int rn)
38388f7e 120{
39eea561
RH
121 if (sve_access_check(s)) {
122 unsigned vsz = vec_full_reg_size(s);
123 gvec_fn(esz, vec_full_reg_offset(s, rd),
124 vec_full_reg_offset(s, rn), vsz, vsz);
125 }
126 return true;
38388f7e
RH
127}
128
39eea561
RH
129/* Invoke a vector expander on three Zregs. */
130static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
131 int esz, int rd, int rn, int rm)
38388f7e 132{
39eea561
RH
133 if (sve_access_check(s)) {
134 unsigned vsz = vec_full_reg_size(s);
135 gvec_fn(esz, vec_full_reg_offset(s, rd),
136 vec_full_reg_offset(s, rn),
137 vec_full_reg_offset(s, rm), vsz, vsz);
138 }
139 return true;
38388f7e
RH
140}
141
39eea561
RH
142/* Invoke a vector move on two Zregs. */
143static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 144{
39eea561 145 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
146}
147
d9d78dcc
RH
148/* Initialize a Zreg with replications of a 64-bit immediate. */
149static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
150{
151 unsigned vsz = vec_full_reg_size(s);
152 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
153}
154
516e246a
RH
155/* Invoke a vector expander on two Pregs. */
156static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
157 int esz, int rd, int rn)
158{
159 if (sve_access_check(s)) {
160 unsigned psz = pred_gvec_reg_size(s);
161 gvec_fn(esz, pred_full_reg_offset(s, rd),
162 pred_full_reg_offset(s, rn), psz, psz);
163 }
164 return true;
165}
166
167/* Invoke a vector expander on three Pregs. */
168static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
169 int esz, int rd, int rn, int rm)
170{
171 if (sve_access_check(s)) {
172 unsigned psz = pred_gvec_reg_size(s);
173 gvec_fn(esz, pred_full_reg_offset(s, rd),
174 pred_full_reg_offset(s, rn),
175 pred_full_reg_offset(s, rm), psz, psz);
176 }
177 return true;
178}
179
180/* Invoke a vector operation on four Pregs. */
181static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
182 int rd, int rn, int rm, int rg)
183{
184 if (sve_access_check(s)) {
185 unsigned psz = pred_gvec_reg_size(s);
186 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
187 pred_full_reg_offset(s, rn),
188 pred_full_reg_offset(s, rm),
189 pred_full_reg_offset(s, rg),
190 psz, psz, gvec_op);
191 }
192 return true;
193}
194
195/* Invoke a vector move on two Pregs. */
196static bool do_mov_p(DisasContext *s, int rd, int rn)
197{
198 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
199}
200
9e18d7a6
RH
201/* Set the cpu flags as per a return from an SVE helper. */
202static void do_pred_flags(TCGv_i32 t)
203{
204 tcg_gen_mov_i32(cpu_NF, t);
205 tcg_gen_andi_i32(cpu_ZF, t, 2);
206 tcg_gen_andi_i32(cpu_CF, t, 1);
207 tcg_gen_movi_i32(cpu_VF, 0);
208}
209
210/* Subroutines computing the ARM PredTest psuedofunction. */
211static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
212{
213 TCGv_i32 t = tcg_temp_new_i32();
214
215 gen_helper_sve_predtest1(t, d, g);
216 do_pred_flags(t);
217 tcg_temp_free_i32(t);
218}
219
220static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
221{
222 TCGv_ptr dptr = tcg_temp_new_ptr();
223 TCGv_ptr gptr = tcg_temp_new_ptr();
224 TCGv_i32 t;
225
226 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
227 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
228 t = tcg_const_i32(words);
229
230 gen_helper_sve_predtest(t, dptr, gptr, t);
231 tcg_temp_free_ptr(dptr);
232 tcg_temp_free_ptr(gptr);
233
234 do_pred_flags(t);
235 tcg_temp_free_i32(t);
236}
237
028e2a7b
RH
238/* For each element size, the bits within a predicate word that are active. */
239const uint64_t pred_esz_masks[4] = {
240 0xffffffffffffffffull, 0x5555555555555555ull,
241 0x1111111111111111ull, 0x0101010101010101ull
242};
243
39eea561
RH
244/*
245 *** SVE Logical - Unpredicated Group
246 */
247
248static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
249{
250 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
251}
252
253static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
254{
255 if (a->rn == a->rm) { /* MOV */
256 return do_mov_z(s, a->rd, a->rn);
257 } else {
258 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
259 }
260}
261
262static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
263{
264 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
265}
266
267static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 268{
39eea561 269 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 270}
d1822297 271
fea98f9c
RH
272/*
273 *** SVE Integer Arithmetic - Unpredicated Group
274 */
275
276static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
277{
278 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
279}
280
281static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
282{
283 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
284}
285
286static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
287{
288 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
289}
290
291static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
292{
293 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
294}
295
296static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
297{
298 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
299}
300
301static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
302{
303 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
304}
305
f97cfd59
RH
306/*
307 *** SVE Integer Arithmetic - Binary Predicated Group
308 */
309
310static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
311{
312 unsigned vsz = vec_full_reg_size(s);
313 if (fn == NULL) {
314 return false;
315 }
316 if (sve_access_check(s)) {
317 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
318 vec_full_reg_offset(s, a->rn),
319 vec_full_reg_offset(s, a->rm),
320 pred_full_reg_offset(s, a->pg),
321 vsz, vsz, 0, fn);
322 }
323 return true;
324}
325
326#define DO_ZPZZ(NAME, name) \
327static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
328 uint32_t insn) \
329{ \
330 static gen_helper_gvec_4 * const fns[4] = { \
331 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
332 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
333 }; \
334 return do_zpzz_ool(s, a, fns[a->esz]); \
335}
336
337DO_ZPZZ(AND, and)
338DO_ZPZZ(EOR, eor)
339DO_ZPZZ(ORR, orr)
340DO_ZPZZ(BIC, bic)
341
342DO_ZPZZ(ADD, add)
343DO_ZPZZ(SUB, sub)
344
345DO_ZPZZ(SMAX, smax)
346DO_ZPZZ(UMAX, umax)
347DO_ZPZZ(SMIN, smin)
348DO_ZPZZ(UMIN, umin)
349DO_ZPZZ(SABD, sabd)
350DO_ZPZZ(UABD, uabd)
351
352DO_ZPZZ(MUL, mul)
353DO_ZPZZ(SMULH, smulh)
354DO_ZPZZ(UMULH, umulh)
355
27721dbb
RH
356DO_ZPZZ(ASR, asr)
357DO_ZPZZ(LSR, lsr)
358DO_ZPZZ(LSL, lsl)
359
f97cfd59
RH
360static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
361{
362 static gen_helper_gvec_4 * const fns[4] = {
363 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
364 };
365 return do_zpzz_ool(s, a, fns[a->esz]);
366}
367
368static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
369{
370 static gen_helper_gvec_4 * const fns[4] = {
371 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
372 };
373 return do_zpzz_ool(s, a, fns[a->esz]);
374}
375
376#undef DO_ZPZZ
377
afac6d04
RH
378/*
379 *** SVE Integer Arithmetic - Unary Predicated Group
380 */
381
382static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
383{
384 if (fn == NULL) {
385 return false;
386 }
387 if (sve_access_check(s)) {
388 unsigned vsz = vec_full_reg_size(s);
389 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
390 vec_full_reg_offset(s, a->rn),
391 pred_full_reg_offset(s, a->pg),
392 vsz, vsz, 0, fn);
393 }
394 return true;
395}
396
397#define DO_ZPZ(NAME, name) \
398static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
399{ \
400 static gen_helper_gvec_3 * const fns[4] = { \
401 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
402 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
403 }; \
404 return do_zpz_ool(s, a, fns[a->esz]); \
405}
406
407DO_ZPZ(CLS, cls)
408DO_ZPZ(CLZ, clz)
409DO_ZPZ(CNT_zpz, cnt_zpz)
410DO_ZPZ(CNOT, cnot)
411DO_ZPZ(NOT_zpz, not_zpz)
412DO_ZPZ(ABS, abs)
413DO_ZPZ(NEG, neg)
414
415static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
416{
417 static gen_helper_gvec_3 * const fns[4] = {
418 NULL,
419 gen_helper_sve_fabs_h,
420 gen_helper_sve_fabs_s,
421 gen_helper_sve_fabs_d
422 };
423 return do_zpz_ool(s, a, fns[a->esz]);
424}
425
426static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
427{
428 static gen_helper_gvec_3 * const fns[4] = {
429 NULL,
430 gen_helper_sve_fneg_h,
431 gen_helper_sve_fneg_s,
432 gen_helper_sve_fneg_d
433 };
434 return do_zpz_ool(s, a, fns[a->esz]);
435}
436
437static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
438{
439 static gen_helper_gvec_3 * const fns[4] = {
440 NULL,
441 gen_helper_sve_sxtb_h,
442 gen_helper_sve_sxtb_s,
443 gen_helper_sve_sxtb_d
444 };
445 return do_zpz_ool(s, a, fns[a->esz]);
446}
447
448static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
449{
450 static gen_helper_gvec_3 * const fns[4] = {
451 NULL,
452 gen_helper_sve_uxtb_h,
453 gen_helper_sve_uxtb_s,
454 gen_helper_sve_uxtb_d
455 };
456 return do_zpz_ool(s, a, fns[a->esz]);
457}
458
459static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
460{
461 static gen_helper_gvec_3 * const fns[4] = {
462 NULL, NULL,
463 gen_helper_sve_sxth_s,
464 gen_helper_sve_sxth_d
465 };
466 return do_zpz_ool(s, a, fns[a->esz]);
467}
468
469static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
470{
471 static gen_helper_gvec_3 * const fns[4] = {
472 NULL, NULL,
473 gen_helper_sve_uxth_s,
474 gen_helper_sve_uxth_d
475 };
476 return do_zpz_ool(s, a, fns[a->esz]);
477}
478
479static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
480{
481 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
482}
483
484static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
485{
486 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
487}
488
489#undef DO_ZPZ
490
047cec97
RH
491/*
492 *** SVE Integer Reduction Group
493 */
494
495typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
496static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
497 gen_helper_gvec_reduc *fn)
498{
499 unsigned vsz = vec_full_reg_size(s);
500 TCGv_ptr t_zn, t_pg;
501 TCGv_i32 desc;
502 TCGv_i64 temp;
503
504 if (fn == NULL) {
505 return false;
506 }
507 if (!sve_access_check(s)) {
508 return true;
509 }
510
511 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
512 temp = tcg_temp_new_i64();
513 t_zn = tcg_temp_new_ptr();
514 t_pg = tcg_temp_new_ptr();
515
516 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
517 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
518 fn(temp, t_zn, t_pg, desc);
519 tcg_temp_free_ptr(t_zn);
520 tcg_temp_free_ptr(t_pg);
521 tcg_temp_free_i32(desc);
522
523 write_fp_dreg(s, a->rd, temp);
524 tcg_temp_free_i64(temp);
525 return true;
526}
527
528#define DO_VPZ(NAME, name) \
529static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
530{ \
531 static gen_helper_gvec_reduc * const fns[4] = { \
532 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
533 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
534 }; \
535 return do_vpz_ool(s, a, fns[a->esz]); \
536}
537
538DO_VPZ(ORV, orv)
539DO_VPZ(ANDV, andv)
540DO_VPZ(EORV, eorv)
541
542DO_VPZ(UADDV, uaddv)
543DO_VPZ(SMAXV, smaxv)
544DO_VPZ(UMAXV, umaxv)
545DO_VPZ(SMINV, sminv)
546DO_VPZ(UMINV, uminv)
547
548static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
549{
550 static gen_helper_gvec_reduc * const fns[4] = {
551 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
552 gen_helper_sve_saddv_s, NULL
553 };
554 return do_vpz_ool(s, a, fns[a->esz]);
555}
556
557#undef DO_VPZ
558
ccd841c3
RH
559/*
560 *** SVE Shift by Immediate - Predicated Group
561 */
562
563/* Store zero into every active element of Zd. We will use this for two
564 * and three-operand predicated instructions for which logic dictates a
565 * zero result.
566 */
567static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
568{
569 static gen_helper_gvec_2 * const fns[4] = {
570 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
571 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
572 };
573 if (sve_access_check(s)) {
574 unsigned vsz = vec_full_reg_size(s);
575 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
576 pred_full_reg_offset(s, pg),
577 vsz, vsz, 0, fns[esz]);
578 }
579 return true;
580}
581
582static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
583 gen_helper_gvec_3 *fn)
584{
585 if (sve_access_check(s)) {
586 unsigned vsz = vec_full_reg_size(s);
587 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
588 vec_full_reg_offset(s, a->rn),
589 pred_full_reg_offset(s, a->pg),
590 vsz, vsz, a->imm, fn);
591 }
592 return true;
593}
594
595static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
596{
597 static gen_helper_gvec_3 * const fns[4] = {
598 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
599 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
600 };
601 if (a->esz < 0) {
602 /* Invalid tsz encoding -- see tszimm_esz. */
603 return false;
604 }
605 /* Shift by element size is architecturally valid. For
606 arithmetic right-shift, it's the same as by one less. */
607 a->imm = MIN(a->imm, (8 << a->esz) - 1);
608 return do_zpzi_ool(s, a, fns[a->esz]);
609}
610
611static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
612{
613 static gen_helper_gvec_3 * const fns[4] = {
614 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
615 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
616 };
617 if (a->esz < 0) {
618 return false;
619 }
620 /* Shift by element size is architecturally valid.
621 For logical shifts, it is a zeroing operation. */
622 if (a->imm >= (8 << a->esz)) {
623 return do_clr_zp(s, a->rd, a->pg, a->esz);
624 } else {
625 return do_zpzi_ool(s, a, fns[a->esz]);
626 }
627}
628
629static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
630{
631 static gen_helper_gvec_3 * const fns[4] = {
632 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
633 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
634 };
635 if (a->esz < 0) {
636 return false;
637 }
638 /* Shift by element size is architecturally valid.
639 For logical shifts, it is a zeroing operation. */
640 if (a->imm >= (8 << a->esz)) {
641 return do_clr_zp(s, a->rd, a->pg, a->esz);
642 } else {
643 return do_zpzi_ool(s, a, fns[a->esz]);
644 }
645}
646
647static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
648{
649 static gen_helper_gvec_3 * const fns[4] = {
650 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
651 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
652 };
653 if (a->esz < 0) {
654 return false;
655 }
656 /* Shift by element size is architecturally valid. For arithmetic
657 right shift for division, it is a zeroing operation. */
658 if (a->imm >= (8 << a->esz)) {
659 return do_clr_zp(s, a->rd, a->pg, a->esz);
660 } else {
661 return do_zpzi_ool(s, a, fns[a->esz]);
662 }
663}
664
fe7f8dfb
RH
665/*
666 *** SVE Bitwise Shift - Predicated Group
667 */
668
669#define DO_ZPZW(NAME, name) \
670static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
671 uint32_t insn) \
672{ \
673 static gen_helper_gvec_4 * const fns[3] = { \
674 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
675 gen_helper_sve_##name##_zpzw_s, \
676 }; \
677 if (a->esz < 0 || a->esz >= 3) { \
678 return false; \
679 } \
680 return do_zpzz_ool(s, a, fns[a->esz]); \
681}
682
683DO_ZPZW(ASR, asr)
684DO_ZPZW(LSR, lsr)
685DO_ZPZW(LSL, lsl)
686
687#undef DO_ZPZW
688
d9d78dcc
RH
689/*
690 *** SVE Bitwise Shift - Unpredicated Group
691 */
692
693static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
694 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
695 int64_t, uint32_t, uint32_t))
696{
697 if (a->esz < 0) {
698 /* Invalid tsz encoding -- see tszimm_esz. */
699 return false;
700 }
701 if (sve_access_check(s)) {
702 unsigned vsz = vec_full_reg_size(s);
703 /* Shift by element size is architecturally valid. For
704 arithmetic right-shift, it's the same as by one less.
705 Otherwise it is a zeroing operation. */
706 if (a->imm >= 8 << a->esz) {
707 if (asr) {
708 a->imm = (8 << a->esz) - 1;
709 } else {
710 do_dupi_z(s, a->rd, 0);
711 return true;
712 }
713 }
714 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
715 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
716 }
717 return true;
718}
719
720static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
721{
722 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
723}
724
725static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
726{
727 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
728}
729
730static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
731{
732 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
733}
734
735static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
736{
737 if (fn == NULL) {
738 return false;
739 }
740 if (sve_access_check(s)) {
741 unsigned vsz = vec_full_reg_size(s);
742 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
743 vec_full_reg_offset(s, a->rn),
744 vec_full_reg_offset(s, a->rm),
745 vsz, vsz, 0, fn);
746 }
747 return true;
748}
749
750#define DO_ZZW(NAME, name) \
751static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
752 uint32_t insn) \
753{ \
754 static gen_helper_gvec_3 * const fns[4] = { \
755 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
756 gen_helper_sve_##name##_zzw_s, NULL \
757 }; \
758 return do_zzw_ool(s, a, fns[a->esz]); \
759}
760
761DO_ZZW(ASR, asr)
762DO_ZZW(LSR, lsr)
763DO_ZZW(LSL, lsl)
764
765#undef DO_ZZW
766
96a36e4a
RH
767/*
768 *** SVE Integer Multiply-Add Group
769 */
770
771static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
772 gen_helper_gvec_5 *fn)
773{
774 if (sve_access_check(s)) {
775 unsigned vsz = vec_full_reg_size(s);
776 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
777 vec_full_reg_offset(s, a->ra),
778 vec_full_reg_offset(s, a->rn),
779 vec_full_reg_offset(s, a->rm),
780 pred_full_reg_offset(s, a->pg),
781 vsz, vsz, 0, fn);
782 }
783 return true;
784}
785
786#define DO_ZPZZZ(NAME, name) \
787static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
788{ \
789 static gen_helper_gvec_5 * const fns[4] = { \
790 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
791 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
792 }; \
793 return do_zpzzz_ool(s, a, fns[a->esz]); \
794}
795
796DO_ZPZZZ(MLA, mla)
797DO_ZPZZZ(MLS, mls)
798
799#undef DO_ZPZZZ
800
9a56c9c3
RH
801/*
802 *** SVE Index Generation Group
803 */
804
805static void do_index(DisasContext *s, int esz, int rd,
806 TCGv_i64 start, TCGv_i64 incr)
807{
808 unsigned vsz = vec_full_reg_size(s);
809 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
810 TCGv_ptr t_zd = tcg_temp_new_ptr();
811
812 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
813 if (esz == 3) {
814 gen_helper_sve_index_d(t_zd, start, incr, desc);
815 } else {
816 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
817 static index_fn * const fns[3] = {
818 gen_helper_sve_index_b,
819 gen_helper_sve_index_h,
820 gen_helper_sve_index_s,
821 };
822 TCGv_i32 s32 = tcg_temp_new_i32();
823 TCGv_i32 i32 = tcg_temp_new_i32();
824
825 tcg_gen_extrl_i64_i32(s32, start);
826 tcg_gen_extrl_i64_i32(i32, incr);
827 fns[esz](t_zd, s32, i32, desc);
828
829 tcg_temp_free_i32(s32);
830 tcg_temp_free_i32(i32);
831 }
832 tcg_temp_free_ptr(t_zd);
833 tcg_temp_free_i32(desc);
834}
835
836static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
837{
838 if (sve_access_check(s)) {
839 TCGv_i64 start = tcg_const_i64(a->imm1);
840 TCGv_i64 incr = tcg_const_i64(a->imm2);
841 do_index(s, a->esz, a->rd, start, incr);
842 tcg_temp_free_i64(start);
843 tcg_temp_free_i64(incr);
844 }
845 return true;
846}
847
848static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
849{
850 if (sve_access_check(s)) {
851 TCGv_i64 start = tcg_const_i64(a->imm);
852 TCGv_i64 incr = cpu_reg(s, a->rm);
853 do_index(s, a->esz, a->rd, start, incr);
854 tcg_temp_free_i64(start);
855 }
856 return true;
857}
858
859static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
860{
861 if (sve_access_check(s)) {
862 TCGv_i64 start = cpu_reg(s, a->rn);
863 TCGv_i64 incr = tcg_const_i64(a->imm);
864 do_index(s, a->esz, a->rd, start, incr);
865 tcg_temp_free_i64(incr);
866 }
867 return true;
868}
869
870static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
871{
872 if (sve_access_check(s)) {
873 TCGv_i64 start = cpu_reg(s, a->rn);
874 TCGv_i64 incr = cpu_reg(s, a->rm);
875 do_index(s, a->esz, a->rd, start, incr);
876 }
877 return true;
878}
879
96f922cc
RH
880/*
881 *** SVE Stack Allocation Group
882 */
883
884static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
885{
886 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
887 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
888 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
889 return true;
890}
891
892static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
893{
894 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
895 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
896 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
897 return true;
898}
899
900static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
901{
902 TCGv_i64 reg = cpu_reg(s, a->rd);
903 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
904 return true;
905}
906
4b242d9c
RH
907/*
908 *** SVE Compute Vector Address Group
909 */
910
911static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
912{
913 if (sve_access_check(s)) {
914 unsigned vsz = vec_full_reg_size(s);
915 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
916 vec_full_reg_offset(s, a->rn),
917 vec_full_reg_offset(s, a->rm),
918 vsz, vsz, a->imm, fn);
919 }
920 return true;
921}
922
923static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
924{
925 return do_adr(s, a, gen_helper_sve_adr_p32);
926}
927
928static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
929{
930 return do_adr(s, a, gen_helper_sve_adr_p64);
931}
932
933static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
934{
935 return do_adr(s, a, gen_helper_sve_adr_s32);
936}
937
938static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
939{
940 return do_adr(s, a, gen_helper_sve_adr_u32);
941}
942
0762cd42
RH
943/*
944 *** SVE Integer Misc - Unpredicated Group
945 */
946
947static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
948{
949 static gen_helper_gvec_2 * const fns[4] = {
950 NULL,
951 gen_helper_sve_fexpa_h,
952 gen_helper_sve_fexpa_s,
953 gen_helper_sve_fexpa_d,
954 };
955 if (a->esz == 0) {
956 return false;
957 }
958 if (sve_access_check(s)) {
959 unsigned vsz = vec_full_reg_size(s);
960 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
961 vec_full_reg_offset(s, a->rn),
962 vsz, vsz, 0, fns[a->esz]);
963 }
964 return true;
965}
966
a1f233f2
RH
967static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
968{
969 static gen_helper_gvec_3 * const fns[4] = {
970 NULL,
971 gen_helper_sve_ftssel_h,
972 gen_helper_sve_ftssel_s,
973 gen_helper_sve_ftssel_d,
974 };
975 if (a->esz == 0) {
976 return false;
977 }
978 if (sve_access_check(s)) {
979 unsigned vsz = vec_full_reg_size(s);
980 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
981 vec_full_reg_offset(s, a->rn),
982 vec_full_reg_offset(s, a->rm),
983 vsz, vsz, 0, fns[a->esz]);
984 }
985 return true;
986}
987
516e246a
RH
988/*
989 *** SVE Predicate Logical Operations Group
990 */
991
992static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
993 const GVecGen4 *gvec_op)
994{
995 if (!sve_access_check(s)) {
996 return true;
997 }
998
999 unsigned psz = pred_gvec_reg_size(s);
1000 int dofs = pred_full_reg_offset(s, a->rd);
1001 int nofs = pred_full_reg_offset(s, a->rn);
1002 int mofs = pred_full_reg_offset(s, a->rm);
1003 int gofs = pred_full_reg_offset(s, a->pg);
1004
1005 if (psz == 8) {
1006 /* Do the operation and the flags generation in temps. */
1007 TCGv_i64 pd = tcg_temp_new_i64();
1008 TCGv_i64 pn = tcg_temp_new_i64();
1009 TCGv_i64 pm = tcg_temp_new_i64();
1010 TCGv_i64 pg = tcg_temp_new_i64();
1011
1012 tcg_gen_ld_i64(pn, cpu_env, nofs);
1013 tcg_gen_ld_i64(pm, cpu_env, mofs);
1014 tcg_gen_ld_i64(pg, cpu_env, gofs);
1015
1016 gvec_op->fni8(pd, pn, pm, pg);
1017 tcg_gen_st_i64(pd, cpu_env, dofs);
1018
1019 do_predtest1(pd, pg);
1020
1021 tcg_temp_free_i64(pd);
1022 tcg_temp_free_i64(pn);
1023 tcg_temp_free_i64(pm);
1024 tcg_temp_free_i64(pg);
1025 } else {
1026 /* The operation and flags generation is large. The computation
1027 * of the flags depends on the original contents of the guarding
1028 * predicate. If the destination overwrites the guarding predicate,
1029 * then the easiest way to get this right is to save a copy.
1030 */
1031 int tofs = gofs;
1032 if (a->rd == a->pg) {
1033 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1034 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1035 }
1036
1037 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1038 do_predtest(s, dofs, tofs, psz / 8);
1039 }
1040 return true;
1041}
1042
1043static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1044{
1045 tcg_gen_and_i64(pd, pn, pm);
1046 tcg_gen_and_i64(pd, pd, pg);
1047}
1048
1049static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1050 TCGv_vec pm, TCGv_vec pg)
1051{
1052 tcg_gen_and_vec(vece, pd, pn, pm);
1053 tcg_gen_and_vec(vece, pd, pd, pg);
1054}
1055
1056static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1057{
1058 static const GVecGen4 op = {
1059 .fni8 = gen_and_pg_i64,
1060 .fniv = gen_and_pg_vec,
1061 .fno = gen_helper_sve_and_pppp,
1062 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1063 };
1064 if (a->s) {
1065 return do_pppp_flags(s, a, &op);
1066 } else if (a->rn == a->rm) {
1067 if (a->pg == a->rn) {
1068 return do_mov_p(s, a->rd, a->rn);
1069 } else {
1070 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1071 }
1072 } else if (a->pg == a->rn || a->pg == a->rm) {
1073 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1074 } else {
1075 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1076 }
1077}
1078
1079static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1080{
1081 tcg_gen_andc_i64(pd, pn, pm);
1082 tcg_gen_and_i64(pd, pd, pg);
1083}
1084
1085static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1086 TCGv_vec pm, TCGv_vec pg)
1087{
1088 tcg_gen_andc_vec(vece, pd, pn, pm);
1089 tcg_gen_and_vec(vece, pd, pd, pg);
1090}
1091
1092static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1093{
1094 static const GVecGen4 op = {
1095 .fni8 = gen_bic_pg_i64,
1096 .fniv = gen_bic_pg_vec,
1097 .fno = gen_helper_sve_bic_pppp,
1098 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1099 };
1100 if (a->s) {
1101 return do_pppp_flags(s, a, &op);
1102 } else if (a->pg == a->rn) {
1103 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1104 } else {
1105 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1106 }
1107}
1108
1109static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110{
1111 tcg_gen_xor_i64(pd, pn, pm);
1112 tcg_gen_and_i64(pd, pd, pg);
1113}
1114
1115static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116 TCGv_vec pm, TCGv_vec pg)
1117{
1118 tcg_gen_xor_vec(vece, pd, pn, pm);
1119 tcg_gen_and_vec(vece, pd, pd, pg);
1120}
1121
1122static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1123{
1124 static const GVecGen4 op = {
1125 .fni8 = gen_eor_pg_i64,
1126 .fniv = gen_eor_pg_vec,
1127 .fno = gen_helper_sve_eor_pppp,
1128 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1129 };
1130 if (a->s) {
1131 return do_pppp_flags(s, a, &op);
1132 } else {
1133 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1134 }
1135}
1136
1137static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1138{
1139 tcg_gen_and_i64(pn, pn, pg);
1140 tcg_gen_andc_i64(pm, pm, pg);
1141 tcg_gen_or_i64(pd, pn, pm);
1142}
1143
1144static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1145 TCGv_vec pm, TCGv_vec pg)
1146{
1147 tcg_gen_and_vec(vece, pn, pn, pg);
1148 tcg_gen_andc_vec(vece, pm, pm, pg);
1149 tcg_gen_or_vec(vece, pd, pn, pm);
1150}
1151
1152static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1153{
1154 static const GVecGen4 op = {
1155 .fni8 = gen_sel_pg_i64,
1156 .fniv = gen_sel_pg_vec,
1157 .fno = gen_helper_sve_sel_pppp,
1158 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1159 };
1160 if (a->s) {
1161 return false;
1162 } else {
1163 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1164 }
1165}
1166
1167static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1168{
1169 tcg_gen_or_i64(pd, pn, pm);
1170 tcg_gen_and_i64(pd, pd, pg);
1171}
1172
1173static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1174 TCGv_vec pm, TCGv_vec pg)
1175{
1176 tcg_gen_or_vec(vece, pd, pn, pm);
1177 tcg_gen_and_vec(vece, pd, pd, pg);
1178}
1179
1180static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1181{
1182 static const GVecGen4 op = {
1183 .fni8 = gen_orr_pg_i64,
1184 .fniv = gen_orr_pg_vec,
1185 .fno = gen_helper_sve_orr_pppp,
1186 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1187 };
1188 if (a->s) {
1189 return do_pppp_flags(s, a, &op);
1190 } else if (a->pg == a->rn && a->rn == a->rm) {
1191 return do_mov_p(s, a->rd, a->rn);
1192 } else {
1193 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1194 }
1195}
1196
1197static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1198{
1199 tcg_gen_orc_i64(pd, pn, pm);
1200 tcg_gen_and_i64(pd, pd, pg);
1201}
1202
1203static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1204 TCGv_vec pm, TCGv_vec pg)
1205{
1206 tcg_gen_orc_vec(vece, pd, pn, pm);
1207 tcg_gen_and_vec(vece, pd, pd, pg);
1208}
1209
1210static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1211{
1212 static const GVecGen4 op = {
1213 .fni8 = gen_orn_pg_i64,
1214 .fniv = gen_orn_pg_vec,
1215 .fno = gen_helper_sve_orn_pppp,
1216 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1217 };
1218 if (a->s) {
1219 return do_pppp_flags(s, a, &op);
1220 } else {
1221 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1222 }
1223}
1224
1225static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1226{
1227 tcg_gen_or_i64(pd, pn, pm);
1228 tcg_gen_andc_i64(pd, pg, pd);
1229}
1230
1231static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1232 TCGv_vec pm, TCGv_vec pg)
1233{
1234 tcg_gen_or_vec(vece, pd, pn, pm);
1235 tcg_gen_andc_vec(vece, pd, pg, pd);
1236}
1237
1238static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1239{
1240 static const GVecGen4 op = {
1241 .fni8 = gen_nor_pg_i64,
1242 .fniv = gen_nor_pg_vec,
1243 .fno = gen_helper_sve_nor_pppp,
1244 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1245 };
1246 if (a->s) {
1247 return do_pppp_flags(s, a, &op);
1248 } else {
1249 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1250 }
1251}
1252
1253static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1254{
1255 tcg_gen_and_i64(pd, pn, pm);
1256 tcg_gen_andc_i64(pd, pg, pd);
1257}
1258
1259static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1260 TCGv_vec pm, TCGv_vec pg)
1261{
1262 tcg_gen_and_vec(vece, pd, pn, pm);
1263 tcg_gen_andc_vec(vece, pd, pg, pd);
1264}
1265
1266static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1267{
1268 static const GVecGen4 op = {
1269 .fni8 = gen_nand_pg_i64,
1270 .fniv = gen_nand_pg_vec,
1271 .fno = gen_helper_sve_nand_pppp,
1272 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1273 };
1274 if (a->s) {
1275 return do_pppp_flags(s, a, &op);
1276 } else {
1277 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1278 }
1279}
1280
9e18d7a6
RH
1281/*
1282 *** SVE Predicate Misc Group
1283 */
1284
1285static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1286{
1287 if (sve_access_check(s)) {
1288 int nofs = pred_full_reg_offset(s, a->rn);
1289 int gofs = pred_full_reg_offset(s, a->pg);
1290 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1291
1292 if (words == 1) {
1293 TCGv_i64 pn = tcg_temp_new_i64();
1294 TCGv_i64 pg = tcg_temp_new_i64();
1295
1296 tcg_gen_ld_i64(pn, cpu_env, nofs);
1297 tcg_gen_ld_i64(pg, cpu_env, gofs);
1298 do_predtest1(pn, pg);
1299
1300 tcg_temp_free_i64(pn);
1301 tcg_temp_free_i64(pg);
1302 } else {
1303 do_predtest(s, nofs, gofs, words);
1304 }
1305 }
1306 return true;
1307}
1308
028e2a7b
RH
1309/* See the ARM pseudocode DecodePredCount. */
1310static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1311{
1312 unsigned elements = fullsz >> esz;
1313 unsigned bound;
1314
1315 switch (pattern) {
1316 case 0x0: /* POW2 */
1317 return pow2floor(elements);
1318 case 0x1: /* VL1 */
1319 case 0x2: /* VL2 */
1320 case 0x3: /* VL3 */
1321 case 0x4: /* VL4 */
1322 case 0x5: /* VL5 */
1323 case 0x6: /* VL6 */
1324 case 0x7: /* VL7 */
1325 case 0x8: /* VL8 */
1326 bound = pattern;
1327 break;
1328 case 0x9: /* VL16 */
1329 case 0xa: /* VL32 */
1330 case 0xb: /* VL64 */
1331 case 0xc: /* VL128 */
1332 case 0xd: /* VL256 */
1333 bound = 16 << (pattern - 9);
1334 break;
1335 case 0x1d: /* MUL4 */
1336 return elements - elements % 4;
1337 case 0x1e: /* MUL3 */
1338 return elements - elements % 3;
1339 case 0x1f: /* ALL */
1340 return elements;
1341 default: /* #uimm5 */
1342 return 0;
1343 }
1344 return elements >= bound ? bound : 0;
1345}
1346
1347/* This handles all of the predicate initialization instructions,
1348 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1349 * so that decode_pred_count returns 0. For SETFFR, we will have
1350 * set RD == 16 == FFR.
1351 */
1352static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1353{
1354 if (!sve_access_check(s)) {
1355 return true;
1356 }
1357
1358 unsigned fullsz = vec_full_reg_size(s);
1359 unsigned ofs = pred_full_reg_offset(s, rd);
1360 unsigned numelem, setsz, i;
1361 uint64_t word, lastword;
1362 TCGv_i64 t;
1363
1364 numelem = decode_pred_count(fullsz, pat, esz);
1365
1366 /* Determine what we must store into each bit, and how many. */
1367 if (numelem == 0) {
1368 lastword = word = 0;
1369 setsz = fullsz;
1370 } else {
1371 setsz = numelem << esz;
1372 lastword = word = pred_esz_masks[esz];
1373 if (setsz % 64) {
1374 lastword &= ~(-1ull << (setsz % 64));
1375 }
1376 }
1377
1378 t = tcg_temp_new_i64();
1379 if (fullsz <= 64) {
1380 tcg_gen_movi_i64(t, lastword);
1381 tcg_gen_st_i64(t, cpu_env, ofs);
1382 goto done;
1383 }
1384
1385 if (word == lastword) {
1386 unsigned maxsz = size_for_gvec(fullsz / 8);
1387 unsigned oprsz = size_for_gvec(setsz / 8);
1388
1389 if (oprsz * 8 == setsz) {
1390 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1391 goto done;
1392 }
1393 if (oprsz * 8 == setsz + 8) {
1394 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1395 tcg_gen_movi_i64(t, 0);
1396 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1397 goto done;
1398 }
1399 }
1400
1401 setsz /= 8;
1402 fullsz /= 8;
1403
1404 tcg_gen_movi_i64(t, word);
1405 for (i = 0; i < setsz; i += 8) {
1406 tcg_gen_st_i64(t, cpu_env, ofs + i);
1407 }
1408 if (lastword != word) {
1409 tcg_gen_movi_i64(t, lastword);
1410 tcg_gen_st_i64(t, cpu_env, ofs + i);
1411 i += 8;
1412 }
1413 if (i < fullsz) {
1414 tcg_gen_movi_i64(t, 0);
1415 for (; i < fullsz; i += 8) {
1416 tcg_gen_st_i64(t, cpu_env, ofs + i);
1417 }
1418 }
1419
1420 done:
1421 tcg_temp_free_i64(t);
1422
1423 /* PTRUES */
1424 if (setflag) {
1425 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1426 tcg_gen_movi_i32(cpu_CF, word == 0);
1427 tcg_gen_movi_i32(cpu_VF, 0);
1428 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1429 }
1430 return true;
1431}
1432
1433static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1434{
1435 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1436}
1437
1438static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1439{
1440 /* Note pat == 31 is #all, to set all elements. */
1441 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1442}
1443
1444static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1445{
1446 /* Note pat == 32 is #unimp, to set no elements. */
1447 return do_predset(s, 0, a->rd, 32, false);
1448}
1449
1450static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1451{
1452 /* The path through do_pppp_flags is complicated enough to want to avoid
1453 * duplication. Frob the arguments into the form of a predicated AND.
1454 */
1455 arg_rprr_s alt_a = {
1456 .rd = a->rd, .pg = a->pg, .s = a->s,
1457 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1458 };
1459 return trans_AND_pppp(s, &alt_a, insn);
1460}
1461
1462static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1463{
1464 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1465}
1466
1467static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1468{
1469 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1470}
1471
1472static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1473 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1474 TCGv_ptr, TCGv_i32))
1475{
1476 if (!sve_access_check(s)) {
1477 return true;
1478 }
1479
1480 TCGv_ptr t_pd = tcg_temp_new_ptr();
1481 TCGv_ptr t_pg = tcg_temp_new_ptr();
1482 TCGv_i32 t;
1483 unsigned desc;
1484
1485 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1486 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1487
1488 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1489 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1490 t = tcg_const_i32(desc);
1491
1492 gen_fn(t, t_pd, t_pg, t);
1493 tcg_temp_free_ptr(t_pd);
1494 tcg_temp_free_ptr(t_pg);
1495
1496 do_pred_flags(t);
1497 tcg_temp_free_i32(t);
1498 return true;
1499}
1500
1501static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1502{
1503 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1504}
1505
1506static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1507{
1508 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1509}
1510
24e82e68
RH
1511/*
1512 *** SVE Element Count Group
1513 */
1514
1515/* Perform an inline saturating addition of a 32-bit value within
1516 * a 64-bit register. The second operand is known to be positive,
1517 * which halves the comparisions we must perform to bound the result.
1518 */
1519static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1520{
1521 int64_t ibound;
1522 TCGv_i64 bound;
1523 TCGCond cond;
1524
1525 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1526 if (u) {
1527 tcg_gen_ext32u_i64(reg, reg);
1528 } else {
1529 tcg_gen_ext32s_i64(reg, reg);
1530 }
1531 if (d) {
1532 tcg_gen_sub_i64(reg, reg, val);
1533 ibound = (u ? 0 : INT32_MIN);
1534 cond = TCG_COND_LT;
1535 } else {
1536 tcg_gen_add_i64(reg, reg, val);
1537 ibound = (u ? UINT32_MAX : INT32_MAX);
1538 cond = TCG_COND_GT;
1539 }
1540 bound = tcg_const_i64(ibound);
1541 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1542 tcg_temp_free_i64(bound);
1543}
1544
1545/* Similarly with 64-bit values. */
1546static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1547{
1548 TCGv_i64 t0 = tcg_temp_new_i64();
1549 TCGv_i64 t1 = tcg_temp_new_i64();
1550 TCGv_i64 t2;
1551
1552 if (u) {
1553 if (d) {
1554 tcg_gen_sub_i64(t0, reg, val);
1555 tcg_gen_movi_i64(t1, 0);
1556 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1557 } else {
1558 tcg_gen_add_i64(t0, reg, val);
1559 tcg_gen_movi_i64(t1, -1);
1560 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1561 }
1562 } else {
1563 if (d) {
1564 /* Detect signed overflow for subtraction. */
1565 tcg_gen_xor_i64(t0, reg, val);
1566 tcg_gen_sub_i64(t1, reg, val);
1567 tcg_gen_xor_i64(reg, reg, t0);
1568 tcg_gen_and_i64(t0, t0, reg);
1569
1570 /* Bound the result. */
1571 tcg_gen_movi_i64(reg, INT64_MIN);
1572 t2 = tcg_const_i64(0);
1573 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1574 } else {
1575 /* Detect signed overflow for addition. */
1576 tcg_gen_xor_i64(t0, reg, val);
1577 tcg_gen_add_i64(reg, reg, val);
1578 tcg_gen_xor_i64(t1, reg, val);
1579 tcg_gen_andc_i64(t0, t1, t0);
1580
1581 /* Bound the result. */
1582 tcg_gen_movi_i64(t1, INT64_MAX);
1583 t2 = tcg_const_i64(0);
1584 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1585 }
1586 tcg_temp_free_i64(t2);
1587 }
1588 tcg_temp_free_i64(t0);
1589 tcg_temp_free_i64(t1);
1590}
1591
1592/* Similarly with a vector and a scalar operand. */
1593static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1594 TCGv_i64 val, bool u, bool d)
1595{
1596 unsigned vsz = vec_full_reg_size(s);
1597 TCGv_ptr dptr, nptr;
1598 TCGv_i32 t32, desc;
1599 TCGv_i64 t64;
1600
1601 dptr = tcg_temp_new_ptr();
1602 nptr = tcg_temp_new_ptr();
1603 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1604 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1605 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1606
1607 switch (esz) {
1608 case MO_8:
1609 t32 = tcg_temp_new_i32();
1610 tcg_gen_extrl_i64_i32(t32, val);
1611 if (d) {
1612 tcg_gen_neg_i32(t32, t32);
1613 }
1614 if (u) {
1615 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1616 } else {
1617 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1618 }
1619 tcg_temp_free_i32(t32);
1620 break;
1621
1622 case MO_16:
1623 t32 = tcg_temp_new_i32();
1624 tcg_gen_extrl_i64_i32(t32, val);
1625 if (d) {
1626 tcg_gen_neg_i32(t32, t32);
1627 }
1628 if (u) {
1629 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1630 } else {
1631 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1632 }
1633 tcg_temp_free_i32(t32);
1634 break;
1635
1636 case MO_32:
1637 t64 = tcg_temp_new_i64();
1638 if (d) {
1639 tcg_gen_neg_i64(t64, val);
1640 } else {
1641 tcg_gen_mov_i64(t64, val);
1642 }
1643 if (u) {
1644 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1645 } else {
1646 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1647 }
1648 tcg_temp_free_i64(t64);
1649 break;
1650
1651 case MO_64:
1652 if (u) {
1653 if (d) {
1654 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1655 } else {
1656 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1657 }
1658 } else if (d) {
1659 t64 = tcg_temp_new_i64();
1660 tcg_gen_neg_i64(t64, val);
1661 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1662 tcg_temp_free_i64(t64);
1663 } else {
1664 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1665 }
1666 break;
1667
1668 default:
1669 g_assert_not_reached();
1670 }
1671
1672 tcg_temp_free_ptr(dptr);
1673 tcg_temp_free_ptr(nptr);
1674 tcg_temp_free_i32(desc);
1675}
1676
1677static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1678{
1679 if (sve_access_check(s)) {
1680 unsigned fullsz = vec_full_reg_size(s);
1681 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1682 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1683 }
1684 return true;
1685}
1686
1687static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1688{
1689 if (sve_access_check(s)) {
1690 unsigned fullsz = vec_full_reg_size(s);
1691 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1692 int inc = numelem * a->imm * (a->d ? -1 : 1);
1693 TCGv_i64 reg = cpu_reg(s, a->rd);
1694
1695 tcg_gen_addi_i64(reg, reg, inc);
1696 }
1697 return true;
1698}
1699
1700static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1701 uint32_t insn)
1702{
1703 if (!sve_access_check(s)) {
1704 return true;
1705 }
1706
1707 unsigned fullsz = vec_full_reg_size(s);
1708 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1709 int inc = numelem * a->imm;
1710 TCGv_i64 reg = cpu_reg(s, a->rd);
1711
1712 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1713 if (inc == 0) {
1714 if (a->u) {
1715 tcg_gen_ext32u_i64(reg, reg);
1716 } else {
1717 tcg_gen_ext32s_i64(reg, reg);
1718 }
1719 } else {
1720 TCGv_i64 t = tcg_const_i64(inc);
1721 do_sat_addsub_32(reg, t, a->u, a->d);
1722 tcg_temp_free_i64(t);
1723 }
1724 return true;
1725}
1726
1727static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1728 uint32_t insn)
1729{
1730 if (!sve_access_check(s)) {
1731 return true;
1732 }
1733
1734 unsigned fullsz = vec_full_reg_size(s);
1735 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736 int inc = numelem * a->imm;
1737 TCGv_i64 reg = cpu_reg(s, a->rd);
1738
1739 if (inc != 0) {
1740 TCGv_i64 t = tcg_const_i64(inc);
1741 do_sat_addsub_64(reg, t, a->u, a->d);
1742 tcg_temp_free_i64(t);
1743 }
1744 return true;
1745}
1746
1747static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1748{
1749 if (a->esz == 0) {
1750 return false;
1751 }
1752
1753 unsigned fullsz = vec_full_reg_size(s);
1754 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1755 int inc = numelem * a->imm;
1756
1757 if (inc != 0) {
1758 if (sve_access_check(s)) {
1759 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1760 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1761 vec_full_reg_offset(s, a->rn),
1762 t, fullsz, fullsz);
1763 tcg_temp_free_i64(t);
1764 }
1765 } else {
1766 do_mov_z(s, a->rd, a->rn);
1767 }
1768 return true;
1769}
1770
1771static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1772 uint32_t insn)
1773{
1774 if (a->esz == 0) {
1775 return false;
1776 }
1777
1778 unsigned fullsz = vec_full_reg_size(s);
1779 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1780 int inc = numelem * a->imm;
1781
1782 if (inc != 0) {
1783 if (sve_access_check(s)) {
1784 TCGv_i64 t = tcg_const_i64(inc);
1785 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1786 tcg_temp_free_i64(t);
1787 }
1788 } else {
1789 do_mov_z(s, a->rd, a->rn);
1790 }
1791 return true;
1792}
1793
e1fa1164
RH
1794/*
1795 *** SVE Bitwise Immediate Group
1796 */
1797
1798static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1799{
1800 uint64_t imm;
1801 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1802 extract32(a->dbm, 0, 6),
1803 extract32(a->dbm, 6, 6))) {
1804 return false;
1805 }
1806 if (sve_access_check(s)) {
1807 unsigned vsz = vec_full_reg_size(s);
1808 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1809 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1810 }
1811 return true;
1812}
1813
1814static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1815{
1816 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1817}
1818
1819static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1820{
1821 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1822}
1823
1824static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1825{
1826 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1827}
1828
1829static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1830{
1831 uint64_t imm;
1832 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1833 extract32(a->dbm, 0, 6),
1834 extract32(a->dbm, 6, 6))) {
1835 return false;
1836 }
1837 if (sve_access_check(s)) {
1838 do_dupi_z(s, a->rd, imm);
1839 }
1840 return true;
1841}
1842
f25a2361
RH
1843/*
1844 *** SVE Integer Wide Immediate - Predicated Group
1845 */
1846
1847/* Implement all merging copies. This is used for CPY (immediate),
1848 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1849 */
1850static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1851 TCGv_i64 val)
1852{
1853 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1854 static gen_cpy * const fns[4] = {
1855 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1856 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1857 };
1858 unsigned vsz = vec_full_reg_size(s);
1859 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1860 TCGv_ptr t_zd = tcg_temp_new_ptr();
1861 TCGv_ptr t_zn = tcg_temp_new_ptr();
1862 TCGv_ptr t_pg = tcg_temp_new_ptr();
1863
1864 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1865 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1866 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1867
1868 fns[esz](t_zd, t_zn, t_pg, val, desc);
1869
1870 tcg_temp_free_ptr(t_zd);
1871 tcg_temp_free_ptr(t_zn);
1872 tcg_temp_free_ptr(t_pg);
1873 tcg_temp_free_i32(desc);
1874}
1875
1876static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1877{
1878 if (a->esz == 0) {
1879 return false;
1880 }
1881 if (sve_access_check(s)) {
1882 /* Decode the VFP immediate. */
1883 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1884 TCGv_i64 t_imm = tcg_const_i64(imm);
1885 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1886 tcg_temp_free_i64(t_imm);
1887 }
1888 return true;
1889}
1890
1891static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1892{
1893 if (a->esz == 0 && extract32(insn, 13, 1)) {
1894 return false;
1895 }
1896 if (sve_access_check(s)) {
1897 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1898 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1899 tcg_temp_free_i64(t_imm);
1900 }
1901 return true;
1902}
1903
1904static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1905{
1906 static gen_helper_gvec_2i * const fns[4] = {
1907 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1908 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1909 };
1910
1911 if (a->esz == 0 && extract32(insn, 13, 1)) {
1912 return false;
1913 }
1914 if (sve_access_check(s)) {
1915 unsigned vsz = vec_full_reg_size(s);
1916 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1917 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1918 pred_full_reg_offset(s, a->pg),
1919 t_imm, vsz, vsz, 0, fns[a->esz]);
1920 tcg_temp_free_i64(t_imm);
1921 }
1922 return true;
1923}
1924
b94f8f60
RH
1925/*
1926 *** SVE Permute Extract Group
1927 */
1928
1929static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1930{
1931 if (!sve_access_check(s)) {
1932 return true;
1933 }
1934
1935 unsigned vsz = vec_full_reg_size(s);
1936 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1937 unsigned n_siz = vsz - n_ofs;
1938 unsigned d = vec_full_reg_offset(s, a->rd);
1939 unsigned n = vec_full_reg_offset(s, a->rn);
1940 unsigned m = vec_full_reg_offset(s, a->rm);
1941
1942 /* Use host vector move insns if we have appropriate sizes
1943 * and no unfortunate overlap.
1944 */
1945 if (m != d
1946 && n_ofs == size_for_gvec(n_ofs)
1947 && n_siz == size_for_gvec(n_siz)
1948 && (d != n || n_siz <= n_ofs)) {
1949 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1950 if (n_ofs != 0) {
1951 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1952 }
1953 } else {
1954 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1955 }
1956 return true;
1957}
1958
30562ab7
RH
1959/*
1960 *** SVE Permute - Unpredicated Group
1961 */
1962
1963static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1964{
1965 if (sve_access_check(s)) {
1966 unsigned vsz = vec_full_reg_size(s);
1967 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1968 vsz, vsz, cpu_reg_sp(s, a->rn));
1969 }
1970 return true;
1971}
1972
1973static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1974{
1975 if ((a->imm & 0x1f) == 0) {
1976 return false;
1977 }
1978 if (sve_access_check(s)) {
1979 unsigned vsz = vec_full_reg_size(s);
1980 unsigned dofs = vec_full_reg_offset(s, a->rd);
1981 unsigned esz, index;
1982
1983 esz = ctz32(a->imm);
1984 index = a->imm >> (esz + 1);
1985
1986 if ((index << esz) < vsz) {
1987 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1988 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
1989 } else {
1990 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
1991 }
1992 }
1993 return true;
1994}
1995
1996static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
1997{
1998 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1999 static gen_insr * const fns[4] = {
2000 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2001 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2002 };
2003 unsigned vsz = vec_full_reg_size(s);
2004 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2005 TCGv_ptr t_zd = tcg_temp_new_ptr();
2006 TCGv_ptr t_zn = tcg_temp_new_ptr();
2007
2008 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2009 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2010
2011 fns[a->esz](t_zd, t_zn, val, desc);
2012
2013 tcg_temp_free_ptr(t_zd);
2014 tcg_temp_free_ptr(t_zn);
2015 tcg_temp_free_i32(desc);
2016}
2017
2018static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2019{
2020 if (sve_access_check(s)) {
2021 TCGv_i64 t = tcg_temp_new_i64();
2022 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2023 do_insr_i64(s, a, t);
2024 tcg_temp_free_i64(t);
2025 }
2026 return true;
2027}
2028
2029static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2030{
2031 if (sve_access_check(s)) {
2032 do_insr_i64(s, a, cpu_reg(s, a->rm));
2033 }
2034 return true;
2035}
2036
2037static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2038{
2039 static gen_helper_gvec_2 * const fns[4] = {
2040 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2041 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2042 };
2043
2044 if (sve_access_check(s)) {
2045 unsigned vsz = vec_full_reg_size(s);
2046 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2047 vec_full_reg_offset(s, a->rn),
2048 vsz, vsz, 0, fns[a->esz]);
2049 }
2050 return true;
2051}
2052
2053static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2054{
2055 static gen_helper_gvec_3 * const fns[4] = {
2056 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2057 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2058 };
2059
2060 if (sve_access_check(s)) {
2061 unsigned vsz = vec_full_reg_size(s);
2062 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2063 vec_full_reg_offset(s, a->rn),
2064 vec_full_reg_offset(s, a->rm),
2065 vsz, vsz, 0, fns[a->esz]);
2066 }
2067 return true;
2068}
2069
2070static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2071{
2072 static gen_helper_gvec_2 * const fns[4][2] = {
2073 { NULL, NULL },
2074 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2075 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2076 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2077 };
2078
2079 if (a->esz == 0) {
2080 return false;
2081 }
2082 if (sve_access_check(s)) {
2083 unsigned vsz = vec_full_reg_size(s);
2084 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2085 vec_full_reg_offset(s, a->rn)
2086 + (a->h ? vsz / 2 : 0),
2087 vsz, vsz, 0, fns[a->esz][a->u]);
2088 }
2089 return true;
2090}
2091
d731d8cb
RH
2092/*
2093 *** SVE Permute - Predicates Group
2094 */
2095
2096static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2097 gen_helper_gvec_3 *fn)
2098{
2099 if (!sve_access_check(s)) {
2100 return true;
2101 }
2102
2103 unsigned vsz = pred_full_reg_size(s);
2104
2105 /* Predicate sizes may be smaller and cannot use simd_desc.
2106 We cannot round up, as we do elsewhere, because we need
2107 the exact size for ZIP2 and REV. We retain the style for
2108 the other helpers for consistency. */
2109 TCGv_ptr t_d = tcg_temp_new_ptr();
2110 TCGv_ptr t_n = tcg_temp_new_ptr();
2111 TCGv_ptr t_m = tcg_temp_new_ptr();
2112 TCGv_i32 t_desc;
2113 int desc;
2114
2115 desc = vsz - 2;
2116 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2117 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2118
2119 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2120 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2121 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2122 t_desc = tcg_const_i32(desc);
2123
2124 fn(t_d, t_n, t_m, t_desc);
2125
2126 tcg_temp_free_ptr(t_d);
2127 tcg_temp_free_ptr(t_n);
2128 tcg_temp_free_ptr(t_m);
2129 tcg_temp_free_i32(t_desc);
2130 return true;
2131}
2132
2133static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2134 gen_helper_gvec_2 *fn)
2135{
2136 if (!sve_access_check(s)) {
2137 return true;
2138 }
2139
2140 unsigned vsz = pred_full_reg_size(s);
2141 TCGv_ptr t_d = tcg_temp_new_ptr();
2142 TCGv_ptr t_n = tcg_temp_new_ptr();
2143 TCGv_i32 t_desc;
2144 int desc;
2145
2146 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2147 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2148
2149 /* Predicate sizes may be smaller and cannot use simd_desc.
2150 We cannot round up, as we do elsewhere, because we need
2151 the exact size for ZIP2 and REV. We retain the style for
2152 the other helpers for consistency. */
2153
2154 desc = vsz - 2;
2155 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2156 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2157 t_desc = tcg_const_i32(desc);
2158
2159 fn(t_d, t_n, t_desc);
2160
2161 tcg_temp_free_i32(t_desc);
2162 tcg_temp_free_ptr(t_d);
2163 tcg_temp_free_ptr(t_n);
2164 return true;
2165}
2166
2167static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2168{
2169 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2170}
2171
2172static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2173{
2174 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2175}
2176
2177static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2178{
2179 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2180}
2181
2182static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2183{
2184 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2185}
2186
2187static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2188{
2189 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2190}
2191
2192static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2193{
2194 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2195}
2196
2197static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2198{
2199 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2200}
2201
2202static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2203{
2204 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2205}
2206
2207static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2208{
2209 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2210}
2211
234b48e9
RH
2212/*
2213 *** SVE Permute - Interleaving Group
2214 */
2215
2216static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2217{
2218 static gen_helper_gvec_3 * const fns[4] = {
2219 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2220 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2221 };
2222
2223 if (sve_access_check(s)) {
2224 unsigned vsz = vec_full_reg_size(s);
2225 unsigned high_ofs = high ? vsz / 2 : 0;
2226 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2227 vec_full_reg_offset(s, a->rn) + high_ofs,
2228 vec_full_reg_offset(s, a->rm) + high_ofs,
2229 vsz, vsz, 0, fns[a->esz]);
2230 }
2231 return true;
2232}
2233
2234static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2235 gen_helper_gvec_3 *fn)
2236{
2237 if (sve_access_check(s)) {
2238 unsigned vsz = vec_full_reg_size(s);
2239 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2240 vec_full_reg_offset(s, a->rn),
2241 vec_full_reg_offset(s, a->rm),
2242 vsz, vsz, data, fn);
2243 }
2244 return true;
2245}
2246
2247static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2248{
2249 return do_zip(s, a, false);
2250}
2251
2252static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2253{
2254 return do_zip(s, a, true);
2255}
2256
2257static gen_helper_gvec_3 * const uzp_fns[4] = {
2258 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2259 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2260};
2261
2262static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2263{
2264 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2265}
2266
2267static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2268{
2269 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2270}
2271
2272static gen_helper_gvec_3 * const trn_fns[4] = {
2273 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2274 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2275};
2276
2277static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2278{
2279 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2280}
2281
2282static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2283{
2284 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2285}
2286
3ca879ae
RH
2287/*
2288 *** SVE Permute Vector - Predicated Group
2289 */
2290
2291static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2292{
2293 static gen_helper_gvec_3 * const fns[4] = {
2294 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2295 };
2296 return do_zpz_ool(s, a, fns[a->esz]);
2297}
2298
ef23cb72
RH
2299/* Call the helper that computes the ARM LastActiveElement pseudocode
2300 * function, scaled by the element size. This includes the not found
2301 * indication; e.g. not found for esz=3 is -8.
2302 */
2303static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2304{
2305 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2306 * round up, as we do elsewhere, because we need the exact size.
2307 */
2308 TCGv_ptr t_p = tcg_temp_new_ptr();
2309 TCGv_i32 t_desc;
2310 unsigned vsz = pred_full_reg_size(s);
2311 unsigned desc;
2312
2313 desc = vsz - 2;
2314 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2315
2316 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2317 t_desc = tcg_const_i32(desc);
2318
2319 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2320
2321 tcg_temp_free_i32(t_desc);
2322 tcg_temp_free_ptr(t_p);
2323}
2324
2325/* Increment LAST to the offset of the next element in the vector,
2326 * wrapping around to 0.
2327 */
2328static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2329{
2330 unsigned vsz = vec_full_reg_size(s);
2331
2332 tcg_gen_addi_i32(last, last, 1 << esz);
2333 if (is_power_of_2(vsz)) {
2334 tcg_gen_andi_i32(last, last, vsz - 1);
2335 } else {
2336 TCGv_i32 max = tcg_const_i32(vsz);
2337 TCGv_i32 zero = tcg_const_i32(0);
2338 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2339 tcg_temp_free_i32(max);
2340 tcg_temp_free_i32(zero);
2341 }
2342}
2343
2344/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2345static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2346{
2347 unsigned vsz = vec_full_reg_size(s);
2348
2349 if (is_power_of_2(vsz)) {
2350 tcg_gen_andi_i32(last, last, vsz - 1);
2351 } else {
2352 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2353 TCGv_i32 zero = tcg_const_i32(0);
2354 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2355 tcg_temp_free_i32(max);
2356 tcg_temp_free_i32(zero);
2357 }
2358}
2359
2360/* Load an unsigned element of ESZ from BASE+OFS. */
2361static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2362{
2363 TCGv_i64 r = tcg_temp_new_i64();
2364
2365 switch (esz) {
2366 case 0:
2367 tcg_gen_ld8u_i64(r, base, ofs);
2368 break;
2369 case 1:
2370 tcg_gen_ld16u_i64(r, base, ofs);
2371 break;
2372 case 2:
2373 tcg_gen_ld32u_i64(r, base, ofs);
2374 break;
2375 case 3:
2376 tcg_gen_ld_i64(r, base, ofs);
2377 break;
2378 default:
2379 g_assert_not_reached();
2380 }
2381 return r;
2382}
2383
2384/* Load an unsigned element of ESZ from RM[LAST]. */
2385static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2386 int rm, int esz)
2387{
2388 TCGv_ptr p = tcg_temp_new_ptr();
2389 TCGv_i64 r;
2390
2391 /* Convert offset into vector into offset into ENV.
2392 * The final adjustment for the vector register base
2393 * is added via constant offset to the load.
2394 */
2395#ifdef HOST_WORDS_BIGENDIAN
2396 /* Adjust for element ordering. See vec_reg_offset. */
2397 if (esz < 3) {
2398 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2399 }
2400#endif
2401 tcg_gen_ext_i32_ptr(p, last);
2402 tcg_gen_add_ptr(p, p, cpu_env);
2403
2404 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2405 tcg_temp_free_ptr(p);
2406
2407 return r;
2408}
2409
2410/* Compute CLAST for a Zreg. */
2411static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2412{
2413 TCGv_i32 last;
2414 TCGLabel *over;
2415 TCGv_i64 ele;
2416 unsigned vsz, esz = a->esz;
2417
2418 if (!sve_access_check(s)) {
2419 return true;
2420 }
2421
2422 last = tcg_temp_local_new_i32();
2423 over = gen_new_label();
2424
2425 find_last_active(s, last, esz, a->pg);
2426
2427 /* There is of course no movcond for a 2048-bit vector,
2428 * so we must branch over the actual store.
2429 */
2430 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2431
2432 if (!before) {
2433 incr_last_active(s, last, esz);
2434 }
2435
2436 ele = load_last_active(s, last, a->rm, esz);
2437 tcg_temp_free_i32(last);
2438
2439 vsz = vec_full_reg_size(s);
2440 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2441 tcg_temp_free_i64(ele);
2442
2443 /* If this insn used MOVPRFX, we may need a second move. */
2444 if (a->rd != a->rn) {
2445 TCGLabel *done = gen_new_label();
2446 tcg_gen_br(done);
2447
2448 gen_set_label(over);
2449 do_mov_z(s, a->rd, a->rn);
2450
2451 gen_set_label(done);
2452 } else {
2453 gen_set_label(over);
2454 }
2455 return true;
2456}
2457
2458static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2459{
2460 return do_clast_vector(s, a, false);
2461}
2462
2463static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2464{
2465 return do_clast_vector(s, a, true);
2466}
2467
2468/* Compute CLAST for a scalar. */
2469static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2470 bool before, TCGv_i64 reg_val)
2471{
2472 TCGv_i32 last = tcg_temp_new_i32();
2473 TCGv_i64 ele, cmp, zero;
2474
2475 find_last_active(s, last, esz, pg);
2476
2477 /* Extend the original value of last prior to incrementing. */
2478 cmp = tcg_temp_new_i64();
2479 tcg_gen_ext_i32_i64(cmp, last);
2480
2481 if (!before) {
2482 incr_last_active(s, last, esz);
2483 }
2484
2485 /* The conceit here is that while last < 0 indicates not found, after
2486 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2487 * from which we can load garbage. We then discard the garbage with
2488 * a conditional move.
2489 */
2490 ele = load_last_active(s, last, rm, esz);
2491 tcg_temp_free_i32(last);
2492
2493 zero = tcg_const_i64(0);
2494 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2495
2496 tcg_temp_free_i64(zero);
2497 tcg_temp_free_i64(cmp);
2498 tcg_temp_free_i64(ele);
2499}
2500
2501/* Compute CLAST for a Vreg. */
2502static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2503{
2504 if (sve_access_check(s)) {
2505 int esz = a->esz;
2506 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2507 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2508
2509 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2510 write_fp_dreg(s, a->rd, reg);
2511 tcg_temp_free_i64(reg);
2512 }
2513 return true;
2514}
2515
2516static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2517{
2518 return do_clast_fp(s, a, false);
2519}
2520
2521static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2522{
2523 return do_clast_fp(s, a, true);
2524}
2525
2526/* Compute CLAST for a Xreg. */
2527static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2528{
2529 TCGv_i64 reg;
2530
2531 if (!sve_access_check(s)) {
2532 return true;
2533 }
2534
2535 reg = cpu_reg(s, a->rd);
2536 switch (a->esz) {
2537 case 0:
2538 tcg_gen_ext8u_i64(reg, reg);
2539 break;
2540 case 1:
2541 tcg_gen_ext16u_i64(reg, reg);
2542 break;
2543 case 2:
2544 tcg_gen_ext32u_i64(reg, reg);
2545 break;
2546 case 3:
2547 break;
2548 default:
2549 g_assert_not_reached();
2550 }
2551
2552 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2553 return true;
2554}
2555
2556static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2557{
2558 return do_clast_general(s, a, false);
2559}
2560
2561static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2562{
2563 return do_clast_general(s, a, true);
2564}
2565
2566/* Compute LAST for a scalar. */
2567static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2568 int pg, int rm, bool before)
2569{
2570 TCGv_i32 last = tcg_temp_new_i32();
2571 TCGv_i64 ret;
2572
2573 find_last_active(s, last, esz, pg);
2574 if (before) {
2575 wrap_last_active(s, last, esz);
2576 } else {
2577 incr_last_active(s, last, esz);
2578 }
2579
2580 ret = load_last_active(s, last, rm, esz);
2581 tcg_temp_free_i32(last);
2582 return ret;
2583}
2584
2585/* Compute LAST for a Vreg. */
2586static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2587{
2588 if (sve_access_check(s)) {
2589 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2590 write_fp_dreg(s, a->rd, val);
2591 tcg_temp_free_i64(val);
2592 }
2593 return true;
2594}
2595
2596static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2597{
2598 return do_last_fp(s, a, false);
2599}
2600
2601static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2602{
2603 return do_last_fp(s, a, true);
2604}
2605
2606/* Compute LAST for a Xreg. */
2607static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2608{
2609 if (sve_access_check(s)) {
2610 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2611 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2612 tcg_temp_free_i64(val);
2613 }
2614 return true;
2615}
2616
2617static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2618{
2619 return do_last_general(s, a, false);
2620}
2621
2622static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2623{
2624 return do_last_general(s, a, true);
2625}
2626
792a5578
RH
2627static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2628{
2629 if (sve_access_check(s)) {
2630 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2631 }
2632 return true;
2633}
2634
2635static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2636{
2637 if (sve_access_check(s)) {
2638 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2639 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2640 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2641 tcg_temp_free_i64(t);
2642 }
2643 return true;
2644}
2645
dae8fb90
RH
2646static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2647{
2648 static gen_helper_gvec_3 * const fns[4] = {
2649 NULL,
2650 gen_helper_sve_revb_h,
2651 gen_helper_sve_revb_s,
2652 gen_helper_sve_revb_d,
2653 };
2654 return do_zpz_ool(s, a, fns[a->esz]);
2655}
2656
2657static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2658{
2659 static gen_helper_gvec_3 * const fns[4] = {
2660 NULL,
2661 NULL,
2662 gen_helper_sve_revh_s,
2663 gen_helper_sve_revh_d,
2664 };
2665 return do_zpz_ool(s, a, fns[a->esz]);
2666}
2667
2668static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2669{
2670 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2671}
2672
2673static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2674{
2675 static gen_helper_gvec_3 * const fns[4] = {
2676 gen_helper_sve_rbit_b,
2677 gen_helper_sve_rbit_h,
2678 gen_helper_sve_rbit_s,
2679 gen_helper_sve_rbit_d,
2680 };
2681 return do_zpz_ool(s, a, fns[a->esz]);
2682}
2683
d1822297
RH
2684/*
2685 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
2686 */
2687
2688/* Subroutine loading a vector register at VOFS of LEN bytes.
2689 * The load should begin at the address Rn + IMM.
2690 */
2691
2692static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
2693 int rn, int imm)
2694{
2695 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
2696 uint32_t len_remain = len % 8;
2697 uint32_t nparts = len / 8 + ctpop8(len_remain);
2698 int midx = get_mem_index(s);
2699 TCGv_i64 addr, t0, t1;
2700
2701 addr = tcg_temp_new_i64();
2702 t0 = tcg_temp_new_i64();
2703
2704 /* Note that unpredicated load/store of vector/predicate registers
2705 * are defined as a stream of bytes, which equates to little-endian
2706 * operations on larger quantities. There is no nice way to force
2707 * a little-endian load for aarch64_be-linux-user out of line.
2708 *
2709 * Attempt to keep code expansion to a minimum by limiting the
2710 * amount of unrolling done.
2711 */
2712 if (nparts <= 4) {
2713 int i;
2714
2715 for (i = 0; i < len_align; i += 8) {
2716 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
2717 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
2718 tcg_gen_st_i64(t0, cpu_env, vofs + i);
2719 }
2720 } else {
2721 TCGLabel *loop = gen_new_label();
2722 TCGv_ptr tp, i = tcg_const_local_ptr(0);
2723
2724 gen_set_label(loop);
2725
2726 /* Minimize the number of local temps that must be re-read from
2727 * the stack each iteration. Instead, re-compute values other
2728 * than the loop counter.
2729 */
2730 tp = tcg_temp_new_ptr();
2731 tcg_gen_addi_ptr(tp, i, imm);
2732 tcg_gen_extu_ptr_i64(addr, tp);
2733 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
2734
2735 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
2736
2737 tcg_gen_add_ptr(tp, cpu_env, i);
2738 tcg_gen_addi_ptr(i, i, 8);
2739 tcg_gen_st_i64(t0, tp, vofs);
2740 tcg_temp_free_ptr(tp);
2741
2742 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
2743 tcg_temp_free_ptr(i);
2744 }
2745
2746 /* Predicate register loads can be any multiple of 2.
2747 * Note that we still store the entire 64-bit unit into cpu_env.
2748 */
2749 if (len_remain) {
2750 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
2751
2752 switch (len_remain) {
2753 case 2:
2754 case 4:
2755 case 8:
2756 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
2757 break;
2758
2759 case 6:
2760 t1 = tcg_temp_new_i64();
2761 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
2762 tcg_gen_addi_i64(addr, addr, 4);
2763 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
2764 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
2765 tcg_temp_free_i64(t1);
2766 break;
2767
2768 default:
2769 g_assert_not_reached();
2770 }
2771 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
2772 }
2773 tcg_temp_free_i64(addr);
2774 tcg_temp_free_i64(t0);
2775}
2776
2777static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
2778{
2779 if (sve_access_check(s)) {
2780 int size = vec_full_reg_size(s);
2781 int off = vec_full_reg_offset(s, a->rd);
2782 do_ldr(s, off, size, a->rn, a->imm * size);
2783 }
2784 return true;
2785}
2786
2787static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
2788{
2789 if (sve_access_check(s)) {
2790 int size = pred_full_reg_size(s);
2791 int off = pred_full_reg_offset(s, a->rd);
2792 do_ldr(s, off, size, a->rn, a->imm * size);
2793 }
2794 return true;
2795}