]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement SVE FP Compare with Zero Group
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
cc48affe 35#include "fpu/softfloat.h"
38388f7e 36
757f9cff 37
9ee3a611
RH
38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
38cadeba
RH
41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
757f9cff
RH
43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
c4e7c493 46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 49
ccd841c3
RH
50/*
51 * Helpers for extracting complex instruction fields.
52 */
53
54/* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
57static int tszimm_esz(int x)
58{
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61}
62
63static int tszimm_shr(int x)
64{
65 return (16 << tszimm_esz(x)) - x;
66}
67
68/* See e.g. LSL (immediate, predicated). */
69static int tszimm_shl(int x)
70{
71 return x - (8 << tszimm_esz(x));
72}
73
24e82e68
RH
74static inline int plus1(int x)
75{
76 return x + 1;
77}
78
f25a2361
RH
79/* The SH bit is in bit 8. Extract the low 8 and shift. */
80static inline int expand_imm_sh8s(int x)
81{
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83}
84
6e6a157d
RH
85static inline int expand_imm_sh8u(int x)
86{
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88}
89
c4e7c493
RH
90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
93static inline int msz_dtype(int msz)
94{
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97}
98
38388f7e
RH
99/*
100 * Include the generated decoder.
101 */
102
103#include "decode-sve.inc.c"
104
105/*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
d1822297
RH
109/* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112static inline int pred_full_reg_offset(DisasContext *s, int regno)
113{
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115}
116
117/* Return the byte size of the whole predicate register, VL / 64. */
118static inline int pred_full_reg_size(DisasContext *s)
119{
120 return s->sve_len >> 3;
121}
122
516e246a
RH
123/* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131static int size_for_gvec(int size)
132{
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138}
139
140static int pred_gvec_reg_size(DisasContext *s)
141{
142 return size_for_gvec(pred_full_reg_size(s));
143}
144
39eea561
RH
145/* Invoke a vector expander on two Zregs. */
146static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
147 int esz, int rd, int rn)
38388f7e 148{
39eea561
RH
149 if (sve_access_check(s)) {
150 unsigned vsz = vec_full_reg_size(s);
151 gvec_fn(esz, vec_full_reg_offset(s, rd),
152 vec_full_reg_offset(s, rn), vsz, vsz);
153 }
154 return true;
38388f7e
RH
155}
156
39eea561
RH
157/* Invoke a vector expander on three Zregs. */
158static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
159 int esz, int rd, int rn, int rm)
38388f7e 160{
39eea561
RH
161 if (sve_access_check(s)) {
162 unsigned vsz = vec_full_reg_size(s);
163 gvec_fn(esz, vec_full_reg_offset(s, rd),
164 vec_full_reg_offset(s, rn),
165 vec_full_reg_offset(s, rm), vsz, vsz);
166 }
167 return true;
38388f7e
RH
168}
169
39eea561
RH
170/* Invoke a vector move on two Zregs. */
171static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 172{
39eea561 173 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
174}
175
d9d78dcc
RH
176/* Initialize a Zreg with replications of a 64-bit immediate. */
177static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
178{
179 unsigned vsz = vec_full_reg_size(s);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
181}
182
516e246a
RH
183/* Invoke a vector expander on two Pregs. */
184static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
185 int esz, int rd, int rn)
186{
187 if (sve_access_check(s)) {
188 unsigned psz = pred_gvec_reg_size(s);
189 gvec_fn(esz, pred_full_reg_offset(s, rd),
190 pred_full_reg_offset(s, rn), psz, psz);
191 }
192 return true;
193}
194
195/* Invoke a vector expander on three Pregs. */
196static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
197 int esz, int rd, int rn, int rm)
198{
199 if (sve_access_check(s)) {
200 unsigned psz = pred_gvec_reg_size(s);
201 gvec_fn(esz, pred_full_reg_offset(s, rd),
202 pred_full_reg_offset(s, rn),
203 pred_full_reg_offset(s, rm), psz, psz);
204 }
205 return true;
206}
207
208/* Invoke a vector operation on four Pregs. */
209static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
210 int rd, int rn, int rm, int rg)
211{
212 if (sve_access_check(s)) {
213 unsigned psz = pred_gvec_reg_size(s);
214 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
215 pred_full_reg_offset(s, rn),
216 pred_full_reg_offset(s, rm),
217 pred_full_reg_offset(s, rg),
218 psz, psz, gvec_op);
219 }
220 return true;
221}
222
223/* Invoke a vector move on two Pregs. */
224static bool do_mov_p(DisasContext *s, int rd, int rn)
225{
226 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
227}
228
9e18d7a6
RH
229/* Set the cpu flags as per a return from an SVE helper. */
230static void do_pred_flags(TCGv_i32 t)
231{
232 tcg_gen_mov_i32(cpu_NF, t);
233 tcg_gen_andi_i32(cpu_ZF, t, 2);
234 tcg_gen_andi_i32(cpu_CF, t, 1);
235 tcg_gen_movi_i32(cpu_VF, 0);
236}
237
238/* Subroutines computing the ARM PredTest psuedofunction. */
239static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
240{
241 TCGv_i32 t = tcg_temp_new_i32();
242
243 gen_helper_sve_predtest1(t, d, g);
244 do_pred_flags(t);
245 tcg_temp_free_i32(t);
246}
247
248static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
249{
250 TCGv_ptr dptr = tcg_temp_new_ptr();
251 TCGv_ptr gptr = tcg_temp_new_ptr();
252 TCGv_i32 t;
253
254 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
255 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
256 t = tcg_const_i32(words);
257
258 gen_helper_sve_predtest(t, dptr, gptr, t);
259 tcg_temp_free_ptr(dptr);
260 tcg_temp_free_ptr(gptr);
261
262 do_pred_flags(t);
263 tcg_temp_free_i32(t);
264}
265
028e2a7b
RH
266/* For each element size, the bits within a predicate word that are active. */
267const uint64_t pred_esz_masks[4] = {
268 0xffffffffffffffffull, 0x5555555555555555ull,
269 0x1111111111111111ull, 0x0101010101010101ull
270};
271
39eea561
RH
272/*
273 *** SVE Logical - Unpredicated Group
274 */
275
276static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
277{
278 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
279}
280
281static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
282{
283 if (a->rn == a->rm) { /* MOV */
284 return do_mov_z(s, a->rd, a->rn);
285 } else {
286 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
287 }
288}
289
290static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291{
292 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
293}
294
295static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 296{
39eea561 297 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 298}
d1822297 299
fea98f9c
RH
300/*
301 *** SVE Integer Arithmetic - Unpredicated Group
302 */
303
304static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
305{
306 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
307}
308
309static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
310{
311 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
312}
313
314static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
315{
316 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
317}
318
319static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
320{
321 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
322}
323
324static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
325{
326 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
327}
328
329static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
330{
331 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
332}
333
f97cfd59
RH
334/*
335 *** SVE Integer Arithmetic - Binary Predicated Group
336 */
337
338static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
339{
340 unsigned vsz = vec_full_reg_size(s);
341 if (fn == NULL) {
342 return false;
343 }
344 if (sve_access_check(s)) {
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
346 vec_full_reg_offset(s, a->rn),
347 vec_full_reg_offset(s, a->rm),
348 pred_full_reg_offset(s, a->pg),
349 vsz, vsz, 0, fn);
350 }
351 return true;
352}
353
354#define DO_ZPZZ(NAME, name) \
355static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
356 uint32_t insn) \
357{ \
358 static gen_helper_gvec_4 * const fns[4] = { \
359 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
360 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
361 }; \
362 return do_zpzz_ool(s, a, fns[a->esz]); \
363}
364
365DO_ZPZZ(AND, and)
366DO_ZPZZ(EOR, eor)
367DO_ZPZZ(ORR, orr)
368DO_ZPZZ(BIC, bic)
369
370DO_ZPZZ(ADD, add)
371DO_ZPZZ(SUB, sub)
372
373DO_ZPZZ(SMAX, smax)
374DO_ZPZZ(UMAX, umax)
375DO_ZPZZ(SMIN, smin)
376DO_ZPZZ(UMIN, umin)
377DO_ZPZZ(SABD, sabd)
378DO_ZPZZ(UABD, uabd)
379
380DO_ZPZZ(MUL, mul)
381DO_ZPZZ(SMULH, smulh)
382DO_ZPZZ(UMULH, umulh)
383
27721dbb
RH
384DO_ZPZZ(ASR, asr)
385DO_ZPZZ(LSR, lsr)
386DO_ZPZZ(LSL, lsl)
387
f97cfd59
RH
388static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
389{
390 static gen_helper_gvec_4 * const fns[4] = {
391 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
392 };
393 return do_zpzz_ool(s, a, fns[a->esz]);
394}
395
396static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
397{
398 static gen_helper_gvec_4 * const fns[4] = {
399 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
400 };
401 return do_zpzz_ool(s, a, fns[a->esz]);
402}
403
d3fe4a29
RH
404DO_ZPZZ(SEL, sel)
405
f97cfd59
RH
406#undef DO_ZPZZ
407
afac6d04
RH
408/*
409 *** SVE Integer Arithmetic - Unary Predicated Group
410 */
411
412static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
413{
414 if (fn == NULL) {
415 return false;
416 }
417 if (sve_access_check(s)) {
418 unsigned vsz = vec_full_reg_size(s);
419 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
420 vec_full_reg_offset(s, a->rn),
421 pred_full_reg_offset(s, a->pg),
422 vsz, vsz, 0, fn);
423 }
424 return true;
425}
426
427#define DO_ZPZ(NAME, name) \
428static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
429{ \
430 static gen_helper_gvec_3 * const fns[4] = { \
431 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
432 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
433 }; \
434 return do_zpz_ool(s, a, fns[a->esz]); \
435}
436
437DO_ZPZ(CLS, cls)
438DO_ZPZ(CLZ, clz)
439DO_ZPZ(CNT_zpz, cnt_zpz)
440DO_ZPZ(CNOT, cnot)
441DO_ZPZ(NOT_zpz, not_zpz)
442DO_ZPZ(ABS, abs)
443DO_ZPZ(NEG, neg)
444
445static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
446{
447 static gen_helper_gvec_3 * const fns[4] = {
448 NULL,
449 gen_helper_sve_fabs_h,
450 gen_helper_sve_fabs_s,
451 gen_helper_sve_fabs_d
452 };
453 return do_zpz_ool(s, a, fns[a->esz]);
454}
455
456static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
457{
458 static gen_helper_gvec_3 * const fns[4] = {
459 NULL,
460 gen_helper_sve_fneg_h,
461 gen_helper_sve_fneg_s,
462 gen_helper_sve_fneg_d
463 };
464 return do_zpz_ool(s, a, fns[a->esz]);
465}
466
467static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
468{
469 static gen_helper_gvec_3 * const fns[4] = {
470 NULL,
471 gen_helper_sve_sxtb_h,
472 gen_helper_sve_sxtb_s,
473 gen_helper_sve_sxtb_d
474 };
475 return do_zpz_ool(s, a, fns[a->esz]);
476}
477
478static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
479{
480 static gen_helper_gvec_3 * const fns[4] = {
481 NULL,
482 gen_helper_sve_uxtb_h,
483 gen_helper_sve_uxtb_s,
484 gen_helper_sve_uxtb_d
485 };
486 return do_zpz_ool(s, a, fns[a->esz]);
487}
488
489static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
490{
491 static gen_helper_gvec_3 * const fns[4] = {
492 NULL, NULL,
493 gen_helper_sve_sxth_s,
494 gen_helper_sve_sxth_d
495 };
496 return do_zpz_ool(s, a, fns[a->esz]);
497}
498
499static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
500{
501 static gen_helper_gvec_3 * const fns[4] = {
502 NULL, NULL,
503 gen_helper_sve_uxth_s,
504 gen_helper_sve_uxth_d
505 };
506 return do_zpz_ool(s, a, fns[a->esz]);
507}
508
509static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
510{
511 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
512}
513
514static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
515{
516 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
517}
518
519#undef DO_ZPZ
520
047cec97
RH
521/*
522 *** SVE Integer Reduction Group
523 */
524
525typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
526static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
527 gen_helper_gvec_reduc *fn)
528{
529 unsigned vsz = vec_full_reg_size(s);
530 TCGv_ptr t_zn, t_pg;
531 TCGv_i32 desc;
532 TCGv_i64 temp;
533
534 if (fn == NULL) {
535 return false;
536 }
537 if (!sve_access_check(s)) {
538 return true;
539 }
540
541 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
542 temp = tcg_temp_new_i64();
543 t_zn = tcg_temp_new_ptr();
544 t_pg = tcg_temp_new_ptr();
545
546 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
547 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
548 fn(temp, t_zn, t_pg, desc);
549 tcg_temp_free_ptr(t_zn);
550 tcg_temp_free_ptr(t_pg);
551 tcg_temp_free_i32(desc);
552
553 write_fp_dreg(s, a->rd, temp);
554 tcg_temp_free_i64(temp);
555 return true;
556}
557
558#define DO_VPZ(NAME, name) \
559static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
560{ \
561 static gen_helper_gvec_reduc * const fns[4] = { \
562 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
563 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
564 }; \
565 return do_vpz_ool(s, a, fns[a->esz]); \
566}
567
568DO_VPZ(ORV, orv)
569DO_VPZ(ANDV, andv)
570DO_VPZ(EORV, eorv)
571
572DO_VPZ(UADDV, uaddv)
573DO_VPZ(SMAXV, smaxv)
574DO_VPZ(UMAXV, umaxv)
575DO_VPZ(SMINV, sminv)
576DO_VPZ(UMINV, uminv)
577
578static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
579{
580 static gen_helper_gvec_reduc * const fns[4] = {
581 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
582 gen_helper_sve_saddv_s, NULL
583 };
584 return do_vpz_ool(s, a, fns[a->esz]);
585}
586
587#undef DO_VPZ
588
ccd841c3
RH
589/*
590 *** SVE Shift by Immediate - Predicated Group
591 */
592
593/* Store zero into every active element of Zd. We will use this for two
594 * and three-operand predicated instructions for which logic dictates a
595 * zero result.
596 */
597static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
598{
599 static gen_helper_gvec_2 * const fns[4] = {
600 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
601 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
602 };
603 if (sve_access_check(s)) {
604 unsigned vsz = vec_full_reg_size(s);
605 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
606 pred_full_reg_offset(s, pg),
607 vsz, vsz, 0, fns[esz]);
608 }
609 return true;
610}
611
68459864
RH
612/* Copy Zn into Zd, storing zeros into inactive elements. */
613static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
614{
615 static gen_helper_gvec_3 * const fns[4] = {
616 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
617 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
618 };
619 unsigned vsz = vec_full_reg_size(s);
620 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
621 vec_full_reg_offset(s, rn),
622 pred_full_reg_offset(s, pg),
623 vsz, vsz, 0, fns[esz]);
624}
625
ccd841c3
RH
626static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
627 gen_helper_gvec_3 *fn)
628{
629 if (sve_access_check(s)) {
630 unsigned vsz = vec_full_reg_size(s);
631 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
632 vec_full_reg_offset(s, a->rn),
633 pred_full_reg_offset(s, a->pg),
634 vsz, vsz, a->imm, fn);
635 }
636 return true;
637}
638
639static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
640{
641 static gen_helper_gvec_3 * const fns[4] = {
642 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
643 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
644 };
645 if (a->esz < 0) {
646 /* Invalid tsz encoding -- see tszimm_esz. */
647 return false;
648 }
649 /* Shift by element size is architecturally valid. For
650 arithmetic right-shift, it's the same as by one less. */
651 a->imm = MIN(a->imm, (8 << a->esz) - 1);
652 return do_zpzi_ool(s, a, fns[a->esz]);
653}
654
655static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
656{
657 static gen_helper_gvec_3 * const fns[4] = {
658 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
659 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
660 };
661 if (a->esz < 0) {
662 return false;
663 }
664 /* Shift by element size is architecturally valid.
665 For logical shifts, it is a zeroing operation. */
666 if (a->imm >= (8 << a->esz)) {
667 return do_clr_zp(s, a->rd, a->pg, a->esz);
668 } else {
669 return do_zpzi_ool(s, a, fns[a->esz]);
670 }
671}
672
673static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
674{
675 static gen_helper_gvec_3 * const fns[4] = {
676 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
677 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
678 };
679 if (a->esz < 0) {
680 return false;
681 }
682 /* Shift by element size is architecturally valid.
683 For logical shifts, it is a zeroing operation. */
684 if (a->imm >= (8 << a->esz)) {
685 return do_clr_zp(s, a->rd, a->pg, a->esz);
686 } else {
687 return do_zpzi_ool(s, a, fns[a->esz]);
688 }
689}
690
691static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
692{
693 static gen_helper_gvec_3 * const fns[4] = {
694 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
695 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
696 };
697 if (a->esz < 0) {
698 return false;
699 }
700 /* Shift by element size is architecturally valid. For arithmetic
701 right shift for division, it is a zeroing operation. */
702 if (a->imm >= (8 << a->esz)) {
703 return do_clr_zp(s, a->rd, a->pg, a->esz);
704 } else {
705 return do_zpzi_ool(s, a, fns[a->esz]);
706 }
707}
708
fe7f8dfb
RH
709/*
710 *** SVE Bitwise Shift - Predicated Group
711 */
712
713#define DO_ZPZW(NAME, name) \
714static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
715 uint32_t insn) \
716{ \
717 static gen_helper_gvec_4 * const fns[3] = { \
718 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
719 gen_helper_sve_##name##_zpzw_s, \
720 }; \
721 if (a->esz < 0 || a->esz >= 3) { \
722 return false; \
723 } \
724 return do_zpzz_ool(s, a, fns[a->esz]); \
725}
726
727DO_ZPZW(ASR, asr)
728DO_ZPZW(LSR, lsr)
729DO_ZPZW(LSL, lsl)
730
731#undef DO_ZPZW
732
d9d78dcc
RH
733/*
734 *** SVE Bitwise Shift - Unpredicated Group
735 */
736
737static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
738 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
739 int64_t, uint32_t, uint32_t))
740{
741 if (a->esz < 0) {
742 /* Invalid tsz encoding -- see tszimm_esz. */
743 return false;
744 }
745 if (sve_access_check(s)) {
746 unsigned vsz = vec_full_reg_size(s);
747 /* Shift by element size is architecturally valid. For
748 arithmetic right-shift, it's the same as by one less.
749 Otherwise it is a zeroing operation. */
750 if (a->imm >= 8 << a->esz) {
751 if (asr) {
752 a->imm = (8 << a->esz) - 1;
753 } else {
754 do_dupi_z(s, a->rd, 0);
755 return true;
756 }
757 }
758 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
759 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
760 }
761 return true;
762}
763
764static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
765{
766 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
767}
768
769static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
770{
771 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
772}
773
774static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
775{
776 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
777}
778
779static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
780{
781 if (fn == NULL) {
782 return false;
783 }
784 if (sve_access_check(s)) {
785 unsigned vsz = vec_full_reg_size(s);
786 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
787 vec_full_reg_offset(s, a->rn),
788 vec_full_reg_offset(s, a->rm),
789 vsz, vsz, 0, fn);
790 }
791 return true;
792}
793
794#define DO_ZZW(NAME, name) \
795static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
796 uint32_t insn) \
797{ \
798 static gen_helper_gvec_3 * const fns[4] = { \
799 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
800 gen_helper_sve_##name##_zzw_s, NULL \
801 }; \
802 return do_zzw_ool(s, a, fns[a->esz]); \
803}
804
805DO_ZZW(ASR, asr)
806DO_ZZW(LSR, lsr)
807DO_ZZW(LSL, lsl)
808
809#undef DO_ZZW
810
96a36e4a
RH
811/*
812 *** SVE Integer Multiply-Add Group
813 */
814
815static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
816 gen_helper_gvec_5 *fn)
817{
818 if (sve_access_check(s)) {
819 unsigned vsz = vec_full_reg_size(s);
820 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
821 vec_full_reg_offset(s, a->ra),
822 vec_full_reg_offset(s, a->rn),
823 vec_full_reg_offset(s, a->rm),
824 pred_full_reg_offset(s, a->pg),
825 vsz, vsz, 0, fn);
826 }
827 return true;
828}
829
830#define DO_ZPZZZ(NAME, name) \
831static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
832{ \
833 static gen_helper_gvec_5 * const fns[4] = { \
834 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
835 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
836 }; \
837 return do_zpzzz_ool(s, a, fns[a->esz]); \
838}
839
840DO_ZPZZZ(MLA, mla)
841DO_ZPZZZ(MLS, mls)
842
843#undef DO_ZPZZZ
844
9a56c9c3
RH
845/*
846 *** SVE Index Generation Group
847 */
848
849static void do_index(DisasContext *s, int esz, int rd,
850 TCGv_i64 start, TCGv_i64 incr)
851{
852 unsigned vsz = vec_full_reg_size(s);
853 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
854 TCGv_ptr t_zd = tcg_temp_new_ptr();
855
856 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
857 if (esz == 3) {
858 gen_helper_sve_index_d(t_zd, start, incr, desc);
859 } else {
860 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
861 static index_fn * const fns[3] = {
862 gen_helper_sve_index_b,
863 gen_helper_sve_index_h,
864 gen_helper_sve_index_s,
865 };
866 TCGv_i32 s32 = tcg_temp_new_i32();
867 TCGv_i32 i32 = tcg_temp_new_i32();
868
869 tcg_gen_extrl_i64_i32(s32, start);
870 tcg_gen_extrl_i64_i32(i32, incr);
871 fns[esz](t_zd, s32, i32, desc);
872
873 tcg_temp_free_i32(s32);
874 tcg_temp_free_i32(i32);
875 }
876 tcg_temp_free_ptr(t_zd);
877 tcg_temp_free_i32(desc);
878}
879
880static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
881{
882 if (sve_access_check(s)) {
883 TCGv_i64 start = tcg_const_i64(a->imm1);
884 TCGv_i64 incr = tcg_const_i64(a->imm2);
885 do_index(s, a->esz, a->rd, start, incr);
886 tcg_temp_free_i64(start);
887 tcg_temp_free_i64(incr);
888 }
889 return true;
890}
891
892static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
893{
894 if (sve_access_check(s)) {
895 TCGv_i64 start = tcg_const_i64(a->imm);
896 TCGv_i64 incr = cpu_reg(s, a->rm);
897 do_index(s, a->esz, a->rd, start, incr);
898 tcg_temp_free_i64(start);
899 }
900 return true;
901}
902
903static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
904{
905 if (sve_access_check(s)) {
906 TCGv_i64 start = cpu_reg(s, a->rn);
907 TCGv_i64 incr = tcg_const_i64(a->imm);
908 do_index(s, a->esz, a->rd, start, incr);
909 tcg_temp_free_i64(incr);
910 }
911 return true;
912}
913
914static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
915{
916 if (sve_access_check(s)) {
917 TCGv_i64 start = cpu_reg(s, a->rn);
918 TCGv_i64 incr = cpu_reg(s, a->rm);
919 do_index(s, a->esz, a->rd, start, incr);
920 }
921 return true;
922}
923
96f922cc
RH
924/*
925 *** SVE Stack Allocation Group
926 */
927
928static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
929{
930 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
931 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
932 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
933 return true;
934}
935
936static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
937{
938 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
939 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
940 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
941 return true;
942}
943
944static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
945{
946 TCGv_i64 reg = cpu_reg(s, a->rd);
947 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
948 return true;
949}
950
4b242d9c
RH
951/*
952 *** SVE Compute Vector Address Group
953 */
954
955static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
956{
957 if (sve_access_check(s)) {
958 unsigned vsz = vec_full_reg_size(s);
959 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
960 vec_full_reg_offset(s, a->rn),
961 vec_full_reg_offset(s, a->rm),
962 vsz, vsz, a->imm, fn);
963 }
964 return true;
965}
966
967static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
968{
969 return do_adr(s, a, gen_helper_sve_adr_p32);
970}
971
972static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
973{
974 return do_adr(s, a, gen_helper_sve_adr_p64);
975}
976
977static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
978{
979 return do_adr(s, a, gen_helper_sve_adr_s32);
980}
981
982static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
983{
984 return do_adr(s, a, gen_helper_sve_adr_u32);
985}
986
0762cd42
RH
987/*
988 *** SVE Integer Misc - Unpredicated Group
989 */
990
991static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
992{
993 static gen_helper_gvec_2 * const fns[4] = {
994 NULL,
995 gen_helper_sve_fexpa_h,
996 gen_helper_sve_fexpa_s,
997 gen_helper_sve_fexpa_d,
998 };
999 if (a->esz == 0) {
1000 return false;
1001 }
1002 if (sve_access_check(s)) {
1003 unsigned vsz = vec_full_reg_size(s);
1004 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1005 vec_full_reg_offset(s, a->rn),
1006 vsz, vsz, 0, fns[a->esz]);
1007 }
1008 return true;
1009}
1010
a1f233f2
RH
1011static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1012{
1013 static gen_helper_gvec_3 * const fns[4] = {
1014 NULL,
1015 gen_helper_sve_ftssel_h,
1016 gen_helper_sve_ftssel_s,
1017 gen_helper_sve_ftssel_d,
1018 };
1019 if (a->esz == 0) {
1020 return false;
1021 }
1022 if (sve_access_check(s)) {
1023 unsigned vsz = vec_full_reg_size(s);
1024 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1025 vec_full_reg_offset(s, a->rn),
1026 vec_full_reg_offset(s, a->rm),
1027 vsz, vsz, 0, fns[a->esz]);
1028 }
1029 return true;
1030}
1031
516e246a
RH
1032/*
1033 *** SVE Predicate Logical Operations Group
1034 */
1035
1036static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1037 const GVecGen4 *gvec_op)
1038{
1039 if (!sve_access_check(s)) {
1040 return true;
1041 }
1042
1043 unsigned psz = pred_gvec_reg_size(s);
1044 int dofs = pred_full_reg_offset(s, a->rd);
1045 int nofs = pred_full_reg_offset(s, a->rn);
1046 int mofs = pred_full_reg_offset(s, a->rm);
1047 int gofs = pred_full_reg_offset(s, a->pg);
1048
1049 if (psz == 8) {
1050 /* Do the operation and the flags generation in temps. */
1051 TCGv_i64 pd = tcg_temp_new_i64();
1052 TCGv_i64 pn = tcg_temp_new_i64();
1053 TCGv_i64 pm = tcg_temp_new_i64();
1054 TCGv_i64 pg = tcg_temp_new_i64();
1055
1056 tcg_gen_ld_i64(pn, cpu_env, nofs);
1057 tcg_gen_ld_i64(pm, cpu_env, mofs);
1058 tcg_gen_ld_i64(pg, cpu_env, gofs);
1059
1060 gvec_op->fni8(pd, pn, pm, pg);
1061 tcg_gen_st_i64(pd, cpu_env, dofs);
1062
1063 do_predtest1(pd, pg);
1064
1065 tcg_temp_free_i64(pd);
1066 tcg_temp_free_i64(pn);
1067 tcg_temp_free_i64(pm);
1068 tcg_temp_free_i64(pg);
1069 } else {
1070 /* The operation and flags generation is large. The computation
1071 * of the flags depends on the original contents of the guarding
1072 * predicate. If the destination overwrites the guarding predicate,
1073 * then the easiest way to get this right is to save a copy.
1074 */
1075 int tofs = gofs;
1076 if (a->rd == a->pg) {
1077 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1078 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1079 }
1080
1081 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1082 do_predtest(s, dofs, tofs, psz / 8);
1083 }
1084 return true;
1085}
1086
1087static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1088{
1089 tcg_gen_and_i64(pd, pn, pm);
1090 tcg_gen_and_i64(pd, pd, pg);
1091}
1092
1093static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1094 TCGv_vec pm, TCGv_vec pg)
1095{
1096 tcg_gen_and_vec(vece, pd, pn, pm);
1097 tcg_gen_and_vec(vece, pd, pd, pg);
1098}
1099
1100static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1101{
1102 static const GVecGen4 op = {
1103 .fni8 = gen_and_pg_i64,
1104 .fniv = gen_and_pg_vec,
1105 .fno = gen_helper_sve_and_pppp,
1106 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1107 };
1108 if (a->s) {
1109 return do_pppp_flags(s, a, &op);
1110 } else if (a->rn == a->rm) {
1111 if (a->pg == a->rn) {
1112 return do_mov_p(s, a->rd, a->rn);
1113 } else {
1114 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1115 }
1116 } else if (a->pg == a->rn || a->pg == a->rm) {
1117 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1118 } else {
1119 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1120 }
1121}
1122
1123static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1124{
1125 tcg_gen_andc_i64(pd, pn, pm);
1126 tcg_gen_and_i64(pd, pd, pg);
1127}
1128
1129static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1130 TCGv_vec pm, TCGv_vec pg)
1131{
1132 tcg_gen_andc_vec(vece, pd, pn, pm);
1133 tcg_gen_and_vec(vece, pd, pd, pg);
1134}
1135
1136static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1137{
1138 static const GVecGen4 op = {
1139 .fni8 = gen_bic_pg_i64,
1140 .fniv = gen_bic_pg_vec,
1141 .fno = gen_helper_sve_bic_pppp,
1142 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1143 };
1144 if (a->s) {
1145 return do_pppp_flags(s, a, &op);
1146 } else if (a->pg == a->rn) {
1147 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1148 } else {
1149 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1150 }
1151}
1152
1153static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1154{
1155 tcg_gen_xor_i64(pd, pn, pm);
1156 tcg_gen_and_i64(pd, pd, pg);
1157}
1158
1159static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1160 TCGv_vec pm, TCGv_vec pg)
1161{
1162 tcg_gen_xor_vec(vece, pd, pn, pm);
1163 tcg_gen_and_vec(vece, pd, pd, pg);
1164}
1165
1166static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1167{
1168 static const GVecGen4 op = {
1169 .fni8 = gen_eor_pg_i64,
1170 .fniv = gen_eor_pg_vec,
1171 .fno = gen_helper_sve_eor_pppp,
1172 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1173 };
1174 if (a->s) {
1175 return do_pppp_flags(s, a, &op);
1176 } else {
1177 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1178 }
1179}
1180
1181static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1182{
1183 tcg_gen_and_i64(pn, pn, pg);
1184 tcg_gen_andc_i64(pm, pm, pg);
1185 tcg_gen_or_i64(pd, pn, pm);
1186}
1187
1188static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1189 TCGv_vec pm, TCGv_vec pg)
1190{
1191 tcg_gen_and_vec(vece, pn, pn, pg);
1192 tcg_gen_andc_vec(vece, pm, pm, pg);
1193 tcg_gen_or_vec(vece, pd, pn, pm);
1194}
1195
1196static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1197{
1198 static const GVecGen4 op = {
1199 .fni8 = gen_sel_pg_i64,
1200 .fniv = gen_sel_pg_vec,
1201 .fno = gen_helper_sve_sel_pppp,
1202 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1203 };
1204 if (a->s) {
1205 return false;
1206 } else {
1207 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1208 }
1209}
1210
1211static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1212{
1213 tcg_gen_or_i64(pd, pn, pm);
1214 tcg_gen_and_i64(pd, pd, pg);
1215}
1216
1217static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1218 TCGv_vec pm, TCGv_vec pg)
1219{
1220 tcg_gen_or_vec(vece, pd, pn, pm);
1221 tcg_gen_and_vec(vece, pd, pd, pg);
1222}
1223
1224static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1225{
1226 static const GVecGen4 op = {
1227 .fni8 = gen_orr_pg_i64,
1228 .fniv = gen_orr_pg_vec,
1229 .fno = gen_helper_sve_orr_pppp,
1230 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1231 };
1232 if (a->s) {
1233 return do_pppp_flags(s, a, &op);
1234 } else if (a->pg == a->rn && a->rn == a->rm) {
1235 return do_mov_p(s, a->rd, a->rn);
1236 } else {
1237 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1238 }
1239}
1240
1241static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1242{
1243 tcg_gen_orc_i64(pd, pn, pm);
1244 tcg_gen_and_i64(pd, pd, pg);
1245}
1246
1247static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1248 TCGv_vec pm, TCGv_vec pg)
1249{
1250 tcg_gen_orc_vec(vece, pd, pn, pm);
1251 tcg_gen_and_vec(vece, pd, pd, pg);
1252}
1253
1254static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1255{
1256 static const GVecGen4 op = {
1257 .fni8 = gen_orn_pg_i64,
1258 .fniv = gen_orn_pg_vec,
1259 .fno = gen_helper_sve_orn_pppp,
1260 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1261 };
1262 if (a->s) {
1263 return do_pppp_flags(s, a, &op);
1264 } else {
1265 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1266 }
1267}
1268
1269static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1270{
1271 tcg_gen_or_i64(pd, pn, pm);
1272 tcg_gen_andc_i64(pd, pg, pd);
1273}
1274
1275static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1276 TCGv_vec pm, TCGv_vec pg)
1277{
1278 tcg_gen_or_vec(vece, pd, pn, pm);
1279 tcg_gen_andc_vec(vece, pd, pg, pd);
1280}
1281
1282static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1283{
1284 static const GVecGen4 op = {
1285 .fni8 = gen_nor_pg_i64,
1286 .fniv = gen_nor_pg_vec,
1287 .fno = gen_helper_sve_nor_pppp,
1288 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1289 };
1290 if (a->s) {
1291 return do_pppp_flags(s, a, &op);
1292 } else {
1293 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1294 }
1295}
1296
1297static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1298{
1299 tcg_gen_and_i64(pd, pn, pm);
1300 tcg_gen_andc_i64(pd, pg, pd);
1301}
1302
1303static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1304 TCGv_vec pm, TCGv_vec pg)
1305{
1306 tcg_gen_and_vec(vece, pd, pn, pm);
1307 tcg_gen_andc_vec(vece, pd, pg, pd);
1308}
1309
1310static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1311{
1312 static const GVecGen4 op = {
1313 .fni8 = gen_nand_pg_i64,
1314 .fniv = gen_nand_pg_vec,
1315 .fno = gen_helper_sve_nand_pppp,
1316 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1317 };
1318 if (a->s) {
1319 return do_pppp_flags(s, a, &op);
1320 } else {
1321 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1322 }
1323}
1324
9e18d7a6
RH
1325/*
1326 *** SVE Predicate Misc Group
1327 */
1328
1329static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1330{
1331 if (sve_access_check(s)) {
1332 int nofs = pred_full_reg_offset(s, a->rn);
1333 int gofs = pred_full_reg_offset(s, a->pg);
1334 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1335
1336 if (words == 1) {
1337 TCGv_i64 pn = tcg_temp_new_i64();
1338 TCGv_i64 pg = tcg_temp_new_i64();
1339
1340 tcg_gen_ld_i64(pn, cpu_env, nofs);
1341 tcg_gen_ld_i64(pg, cpu_env, gofs);
1342 do_predtest1(pn, pg);
1343
1344 tcg_temp_free_i64(pn);
1345 tcg_temp_free_i64(pg);
1346 } else {
1347 do_predtest(s, nofs, gofs, words);
1348 }
1349 }
1350 return true;
1351}
1352
028e2a7b
RH
1353/* See the ARM pseudocode DecodePredCount. */
1354static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1355{
1356 unsigned elements = fullsz >> esz;
1357 unsigned bound;
1358
1359 switch (pattern) {
1360 case 0x0: /* POW2 */
1361 return pow2floor(elements);
1362 case 0x1: /* VL1 */
1363 case 0x2: /* VL2 */
1364 case 0x3: /* VL3 */
1365 case 0x4: /* VL4 */
1366 case 0x5: /* VL5 */
1367 case 0x6: /* VL6 */
1368 case 0x7: /* VL7 */
1369 case 0x8: /* VL8 */
1370 bound = pattern;
1371 break;
1372 case 0x9: /* VL16 */
1373 case 0xa: /* VL32 */
1374 case 0xb: /* VL64 */
1375 case 0xc: /* VL128 */
1376 case 0xd: /* VL256 */
1377 bound = 16 << (pattern - 9);
1378 break;
1379 case 0x1d: /* MUL4 */
1380 return elements - elements % 4;
1381 case 0x1e: /* MUL3 */
1382 return elements - elements % 3;
1383 case 0x1f: /* ALL */
1384 return elements;
1385 default: /* #uimm5 */
1386 return 0;
1387 }
1388 return elements >= bound ? bound : 0;
1389}
1390
1391/* This handles all of the predicate initialization instructions,
1392 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1393 * so that decode_pred_count returns 0. For SETFFR, we will have
1394 * set RD == 16 == FFR.
1395 */
1396static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1397{
1398 if (!sve_access_check(s)) {
1399 return true;
1400 }
1401
1402 unsigned fullsz = vec_full_reg_size(s);
1403 unsigned ofs = pred_full_reg_offset(s, rd);
1404 unsigned numelem, setsz, i;
1405 uint64_t word, lastword;
1406 TCGv_i64 t;
1407
1408 numelem = decode_pred_count(fullsz, pat, esz);
1409
1410 /* Determine what we must store into each bit, and how many. */
1411 if (numelem == 0) {
1412 lastword = word = 0;
1413 setsz = fullsz;
1414 } else {
1415 setsz = numelem << esz;
1416 lastword = word = pred_esz_masks[esz];
1417 if (setsz % 64) {
1418 lastword &= ~(-1ull << (setsz % 64));
1419 }
1420 }
1421
1422 t = tcg_temp_new_i64();
1423 if (fullsz <= 64) {
1424 tcg_gen_movi_i64(t, lastword);
1425 tcg_gen_st_i64(t, cpu_env, ofs);
1426 goto done;
1427 }
1428
1429 if (word == lastword) {
1430 unsigned maxsz = size_for_gvec(fullsz / 8);
1431 unsigned oprsz = size_for_gvec(setsz / 8);
1432
1433 if (oprsz * 8 == setsz) {
1434 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1435 goto done;
1436 }
1437 if (oprsz * 8 == setsz + 8) {
1438 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1439 tcg_gen_movi_i64(t, 0);
1440 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1441 goto done;
1442 }
1443 }
1444
1445 setsz /= 8;
1446 fullsz /= 8;
1447
1448 tcg_gen_movi_i64(t, word);
1449 for (i = 0; i < setsz; i += 8) {
1450 tcg_gen_st_i64(t, cpu_env, ofs + i);
1451 }
1452 if (lastword != word) {
1453 tcg_gen_movi_i64(t, lastword);
1454 tcg_gen_st_i64(t, cpu_env, ofs + i);
1455 i += 8;
1456 }
1457 if (i < fullsz) {
1458 tcg_gen_movi_i64(t, 0);
1459 for (; i < fullsz; i += 8) {
1460 tcg_gen_st_i64(t, cpu_env, ofs + i);
1461 }
1462 }
1463
1464 done:
1465 tcg_temp_free_i64(t);
1466
1467 /* PTRUES */
1468 if (setflag) {
1469 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1470 tcg_gen_movi_i32(cpu_CF, word == 0);
1471 tcg_gen_movi_i32(cpu_VF, 0);
1472 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1473 }
1474 return true;
1475}
1476
1477static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1478{
1479 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1480}
1481
1482static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1483{
1484 /* Note pat == 31 is #all, to set all elements. */
1485 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1486}
1487
1488static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1489{
1490 /* Note pat == 32 is #unimp, to set no elements. */
1491 return do_predset(s, 0, a->rd, 32, false);
1492}
1493
1494static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1495{
1496 /* The path through do_pppp_flags is complicated enough to want to avoid
1497 * duplication. Frob the arguments into the form of a predicated AND.
1498 */
1499 arg_rprr_s alt_a = {
1500 .rd = a->rd, .pg = a->pg, .s = a->s,
1501 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1502 };
1503 return trans_AND_pppp(s, &alt_a, insn);
1504}
1505
1506static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1507{
1508 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1509}
1510
1511static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1512{
1513 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1514}
1515
1516static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1517 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1518 TCGv_ptr, TCGv_i32))
1519{
1520 if (!sve_access_check(s)) {
1521 return true;
1522 }
1523
1524 TCGv_ptr t_pd = tcg_temp_new_ptr();
1525 TCGv_ptr t_pg = tcg_temp_new_ptr();
1526 TCGv_i32 t;
1527 unsigned desc;
1528
1529 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1530 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1531
1532 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1533 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1534 t = tcg_const_i32(desc);
1535
1536 gen_fn(t, t_pd, t_pg, t);
1537 tcg_temp_free_ptr(t_pd);
1538 tcg_temp_free_ptr(t_pg);
1539
1540 do_pred_flags(t);
1541 tcg_temp_free_i32(t);
1542 return true;
1543}
1544
1545static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1546{
1547 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1548}
1549
1550static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1551{
1552 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1553}
1554
24e82e68
RH
1555/*
1556 *** SVE Element Count Group
1557 */
1558
1559/* Perform an inline saturating addition of a 32-bit value within
1560 * a 64-bit register. The second operand is known to be positive,
1561 * which halves the comparisions we must perform to bound the result.
1562 */
1563static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1564{
1565 int64_t ibound;
1566 TCGv_i64 bound;
1567 TCGCond cond;
1568
1569 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1570 if (u) {
1571 tcg_gen_ext32u_i64(reg, reg);
1572 } else {
1573 tcg_gen_ext32s_i64(reg, reg);
1574 }
1575 if (d) {
1576 tcg_gen_sub_i64(reg, reg, val);
1577 ibound = (u ? 0 : INT32_MIN);
1578 cond = TCG_COND_LT;
1579 } else {
1580 tcg_gen_add_i64(reg, reg, val);
1581 ibound = (u ? UINT32_MAX : INT32_MAX);
1582 cond = TCG_COND_GT;
1583 }
1584 bound = tcg_const_i64(ibound);
1585 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1586 tcg_temp_free_i64(bound);
1587}
1588
1589/* Similarly with 64-bit values. */
1590static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1591{
1592 TCGv_i64 t0 = tcg_temp_new_i64();
1593 TCGv_i64 t1 = tcg_temp_new_i64();
1594 TCGv_i64 t2;
1595
1596 if (u) {
1597 if (d) {
1598 tcg_gen_sub_i64(t0, reg, val);
1599 tcg_gen_movi_i64(t1, 0);
1600 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1601 } else {
1602 tcg_gen_add_i64(t0, reg, val);
1603 tcg_gen_movi_i64(t1, -1);
1604 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1605 }
1606 } else {
1607 if (d) {
1608 /* Detect signed overflow for subtraction. */
1609 tcg_gen_xor_i64(t0, reg, val);
1610 tcg_gen_sub_i64(t1, reg, val);
1611 tcg_gen_xor_i64(reg, reg, t0);
1612 tcg_gen_and_i64(t0, t0, reg);
1613
1614 /* Bound the result. */
1615 tcg_gen_movi_i64(reg, INT64_MIN);
1616 t2 = tcg_const_i64(0);
1617 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1618 } else {
1619 /* Detect signed overflow for addition. */
1620 tcg_gen_xor_i64(t0, reg, val);
1621 tcg_gen_add_i64(reg, reg, val);
1622 tcg_gen_xor_i64(t1, reg, val);
1623 tcg_gen_andc_i64(t0, t1, t0);
1624
1625 /* Bound the result. */
1626 tcg_gen_movi_i64(t1, INT64_MAX);
1627 t2 = tcg_const_i64(0);
1628 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1629 }
1630 tcg_temp_free_i64(t2);
1631 }
1632 tcg_temp_free_i64(t0);
1633 tcg_temp_free_i64(t1);
1634}
1635
1636/* Similarly with a vector and a scalar operand. */
1637static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1638 TCGv_i64 val, bool u, bool d)
1639{
1640 unsigned vsz = vec_full_reg_size(s);
1641 TCGv_ptr dptr, nptr;
1642 TCGv_i32 t32, desc;
1643 TCGv_i64 t64;
1644
1645 dptr = tcg_temp_new_ptr();
1646 nptr = tcg_temp_new_ptr();
1647 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1648 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1649 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1650
1651 switch (esz) {
1652 case MO_8:
1653 t32 = tcg_temp_new_i32();
1654 tcg_gen_extrl_i64_i32(t32, val);
1655 if (d) {
1656 tcg_gen_neg_i32(t32, t32);
1657 }
1658 if (u) {
1659 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1660 } else {
1661 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1662 }
1663 tcg_temp_free_i32(t32);
1664 break;
1665
1666 case MO_16:
1667 t32 = tcg_temp_new_i32();
1668 tcg_gen_extrl_i64_i32(t32, val);
1669 if (d) {
1670 tcg_gen_neg_i32(t32, t32);
1671 }
1672 if (u) {
1673 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1674 } else {
1675 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1676 }
1677 tcg_temp_free_i32(t32);
1678 break;
1679
1680 case MO_32:
1681 t64 = tcg_temp_new_i64();
1682 if (d) {
1683 tcg_gen_neg_i64(t64, val);
1684 } else {
1685 tcg_gen_mov_i64(t64, val);
1686 }
1687 if (u) {
1688 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1689 } else {
1690 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1691 }
1692 tcg_temp_free_i64(t64);
1693 break;
1694
1695 case MO_64:
1696 if (u) {
1697 if (d) {
1698 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1699 } else {
1700 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1701 }
1702 } else if (d) {
1703 t64 = tcg_temp_new_i64();
1704 tcg_gen_neg_i64(t64, val);
1705 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1706 tcg_temp_free_i64(t64);
1707 } else {
1708 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1709 }
1710 break;
1711
1712 default:
1713 g_assert_not_reached();
1714 }
1715
1716 tcg_temp_free_ptr(dptr);
1717 tcg_temp_free_ptr(nptr);
1718 tcg_temp_free_i32(desc);
1719}
1720
1721static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1722{
1723 if (sve_access_check(s)) {
1724 unsigned fullsz = vec_full_reg_size(s);
1725 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1726 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1727 }
1728 return true;
1729}
1730
1731static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1732{
1733 if (sve_access_check(s)) {
1734 unsigned fullsz = vec_full_reg_size(s);
1735 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736 int inc = numelem * a->imm * (a->d ? -1 : 1);
1737 TCGv_i64 reg = cpu_reg(s, a->rd);
1738
1739 tcg_gen_addi_i64(reg, reg, inc);
1740 }
1741 return true;
1742}
1743
1744static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1745 uint32_t insn)
1746{
1747 if (!sve_access_check(s)) {
1748 return true;
1749 }
1750
1751 unsigned fullsz = vec_full_reg_size(s);
1752 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1753 int inc = numelem * a->imm;
1754 TCGv_i64 reg = cpu_reg(s, a->rd);
1755
1756 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1757 if (inc == 0) {
1758 if (a->u) {
1759 tcg_gen_ext32u_i64(reg, reg);
1760 } else {
1761 tcg_gen_ext32s_i64(reg, reg);
1762 }
1763 } else {
1764 TCGv_i64 t = tcg_const_i64(inc);
1765 do_sat_addsub_32(reg, t, a->u, a->d);
1766 tcg_temp_free_i64(t);
1767 }
1768 return true;
1769}
1770
1771static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1772 uint32_t insn)
1773{
1774 if (!sve_access_check(s)) {
1775 return true;
1776 }
1777
1778 unsigned fullsz = vec_full_reg_size(s);
1779 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1780 int inc = numelem * a->imm;
1781 TCGv_i64 reg = cpu_reg(s, a->rd);
1782
1783 if (inc != 0) {
1784 TCGv_i64 t = tcg_const_i64(inc);
1785 do_sat_addsub_64(reg, t, a->u, a->d);
1786 tcg_temp_free_i64(t);
1787 }
1788 return true;
1789}
1790
1791static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1792{
1793 if (a->esz == 0) {
1794 return false;
1795 }
1796
1797 unsigned fullsz = vec_full_reg_size(s);
1798 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1799 int inc = numelem * a->imm;
1800
1801 if (inc != 0) {
1802 if (sve_access_check(s)) {
1803 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1804 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1805 vec_full_reg_offset(s, a->rn),
1806 t, fullsz, fullsz);
1807 tcg_temp_free_i64(t);
1808 }
1809 } else {
1810 do_mov_z(s, a->rd, a->rn);
1811 }
1812 return true;
1813}
1814
1815static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1816 uint32_t insn)
1817{
1818 if (a->esz == 0) {
1819 return false;
1820 }
1821
1822 unsigned fullsz = vec_full_reg_size(s);
1823 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1824 int inc = numelem * a->imm;
1825
1826 if (inc != 0) {
1827 if (sve_access_check(s)) {
1828 TCGv_i64 t = tcg_const_i64(inc);
1829 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1830 tcg_temp_free_i64(t);
1831 }
1832 } else {
1833 do_mov_z(s, a->rd, a->rn);
1834 }
1835 return true;
1836}
1837
e1fa1164
RH
1838/*
1839 *** SVE Bitwise Immediate Group
1840 */
1841
1842static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1843{
1844 uint64_t imm;
1845 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1846 extract32(a->dbm, 0, 6),
1847 extract32(a->dbm, 6, 6))) {
1848 return false;
1849 }
1850 if (sve_access_check(s)) {
1851 unsigned vsz = vec_full_reg_size(s);
1852 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1853 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1854 }
1855 return true;
1856}
1857
1858static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1859{
1860 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1861}
1862
1863static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1864{
1865 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1866}
1867
1868static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1869{
1870 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1871}
1872
1873static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1874{
1875 uint64_t imm;
1876 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1877 extract32(a->dbm, 0, 6),
1878 extract32(a->dbm, 6, 6))) {
1879 return false;
1880 }
1881 if (sve_access_check(s)) {
1882 do_dupi_z(s, a->rd, imm);
1883 }
1884 return true;
1885}
1886
f25a2361
RH
1887/*
1888 *** SVE Integer Wide Immediate - Predicated Group
1889 */
1890
1891/* Implement all merging copies. This is used for CPY (immediate),
1892 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1893 */
1894static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1895 TCGv_i64 val)
1896{
1897 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1898 static gen_cpy * const fns[4] = {
1899 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1900 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1901 };
1902 unsigned vsz = vec_full_reg_size(s);
1903 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1904 TCGv_ptr t_zd = tcg_temp_new_ptr();
1905 TCGv_ptr t_zn = tcg_temp_new_ptr();
1906 TCGv_ptr t_pg = tcg_temp_new_ptr();
1907
1908 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1909 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1910 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1911
1912 fns[esz](t_zd, t_zn, t_pg, val, desc);
1913
1914 tcg_temp_free_ptr(t_zd);
1915 tcg_temp_free_ptr(t_zn);
1916 tcg_temp_free_ptr(t_pg);
1917 tcg_temp_free_i32(desc);
1918}
1919
1920static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1921{
1922 if (a->esz == 0) {
1923 return false;
1924 }
1925 if (sve_access_check(s)) {
1926 /* Decode the VFP immediate. */
1927 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1928 TCGv_i64 t_imm = tcg_const_i64(imm);
1929 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1930 tcg_temp_free_i64(t_imm);
1931 }
1932 return true;
1933}
1934
1935static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1936{
1937 if (a->esz == 0 && extract32(insn, 13, 1)) {
1938 return false;
1939 }
1940 if (sve_access_check(s)) {
1941 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1942 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943 tcg_temp_free_i64(t_imm);
1944 }
1945 return true;
1946}
1947
1948static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1949{
1950 static gen_helper_gvec_2i * const fns[4] = {
1951 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1952 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1953 };
1954
1955 if (a->esz == 0 && extract32(insn, 13, 1)) {
1956 return false;
1957 }
1958 if (sve_access_check(s)) {
1959 unsigned vsz = vec_full_reg_size(s);
1960 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1961 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1962 pred_full_reg_offset(s, a->pg),
1963 t_imm, vsz, vsz, 0, fns[a->esz]);
1964 tcg_temp_free_i64(t_imm);
1965 }
1966 return true;
1967}
1968
b94f8f60
RH
1969/*
1970 *** SVE Permute Extract Group
1971 */
1972
1973static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1974{
1975 if (!sve_access_check(s)) {
1976 return true;
1977 }
1978
1979 unsigned vsz = vec_full_reg_size(s);
1980 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1981 unsigned n_siz = vsz - n_ofs;
1982 unsigned d = vec_full_reg_offset(s, a->rd);
1983 unsigned n = vec_full_reg_offset(s, a->rn);
1984 unsigned m = vec_full_reg_offset(s, a->rm);
1985
1986 /* Use host vector move insns if we have appropriate sizes
1987 * and no unfortunate overlap.
1988 */
1989 if (m != d
1990 && n_ofs == size_for_gvec(n_ofs)
1991 && n_siz == size_for_gvec(n_siz)
1992 && (d != n || n_siz <= n_ofs)) {
1993 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1994 if (n_ofs != 0) {
1995 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1996 }
1997 } else {
1998 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1999 }
2000 return true;
2001}
2002
30562ab7
RH
2003/*
2004 *** SVE Permute - Unpredicated Group
2005 */
2006
2007static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2008{
2009 if (sve_access_check(s)) {
2010 unsigned vsz = vec_full_reg_size(s);
2011 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2012 vsz, vsz, cpu_reg_sp(s, a->rn));
2013 }
2014 return true;
2015}
2016
2017static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2018{
2019 if ((a->imm & 0x1f) == 0) {
2020 return false;
2021 }
2022 if (sve_access_check(s)) {
2023 unsigned vsz = vec_full_reg_size(s);
2024 unsigned dofs = vec_full_reg_offset(s, a->rd);
2025 unsigned esz, index;
2026
2027 esz = ctz32(a->imm);
2028 index = a->imm >> (esz + 1);
2029
2030 if ((index << esz) < vsz) {
2031 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2032 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2033 } else {
2034 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2035 }
2036 }
2037 return true;
2038}
2039
2040static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2041{
2042 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2043 static gen_insr * const fns[4] = {
2044 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2045 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2046 };
2047 unsigned vsz = vec_full_reg_size(s);
2048 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2049 TCGv_ptr t_zd = tcg_temp_new_ptr();
2050 TCGv_ptr t_zn = tcg_temp_new_ptr();
2051
2052 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2053 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2054
2055 fns[a->esz](t_zd, t_zn, val, desc);
2056
2057 tcg_temp_free_ptr(t_zd);
2058 tcg_temp_free_ptr(t_zn);
2059 tcg_temp_free_i32(desc);
2060}
2061
2062static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2063{
2064 if (sve_access_check(s)) {
2065 TCGv_i64 t = tcg_temp_new_i64();
2066 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2067 do_insr_i64(s, a, t);
2068 tcg_temp_free_i64(t);
2069 }
2070 return true;
2071}
2072
2073static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2074{
2075 if (sve_access_check(s)) {
2076 do_insr_i64(s, a, cpu_reg(s, a->rm));
2077 }
2078 return true;
2079}
2080
2081static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2082{
2083 static gen_helper_gvec_2 * const fns[4] = {
2084 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2085 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2086 };
2087
2088 if (sve_access_check(s)) {
2089 unsigned vsz = vec_full_reg_size(s);
2090 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2091 vec_full_reg_offset(s, a->rn),
2092 vsz, vsz, 0, fns[a->esz]);
2093 }
2094 return true;
2095}
2096
2097static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2098{
2099 static gen_helper_gvec_3 * const fns[4] = {
2100 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2101 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2102 };
2103
2104 if (sve_access_check(s)) {
2105 unsigned vsz = vec_full_reg_size(s);
2106 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2107 vec_full_reg_offset(s, a->rn),
2108 vec_full_reg_offset(s, a->rm),
2109 vsz, vsz, 0, fns[a->esz]);
2110 }
2111 return true;
2112}
2113
2114static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2115{
2116 static gen_helper_gvec_2 * const fns[4][2] = {
2117 { NULL, NULL },
2118 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2119 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2120 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2121 };
2122
2123 if (a->esz == 0) {
2124 return false;
2125 }
2126 if (sve_access_check(s)) {
2127 unsigned vsz = vec_full_reg_size(s);
2128 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2129 vec_full_reg_offset(s, a->rn)
2130 + (a->h ? vsz / 2 : 0),
2131 vsz, vsz, 0, fns[a->esz][a->u]);
2132 }
2133 return true;
2134}
2135
d731d8cb
RH
2136/*
2137 *** SVE Permute - Predicates Group
2138 */
2139
2140static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2141 gen_helper_gvec_3 *fn)
2142{
2143 if (!sve_access_check(s)) {
2144 return true;
2145 }
2146
2147 unsigned vsz = pred_full_reg_size(s);
2148
2149 /* Predicate sizes may be smaller and cannot use simd_desc.
2150 We cannot round up, as we do elsewhere, because we need
2151 the exact size for ZIP2 and REV. We retain the style for
2152 the other helpers for consistency. */
2153 TCGv_ptr t_d = tcg_temp_new_ptr();
2154 TCGv_ptr t_n = tcg_temp_new_ptr();
2155 TCGv_ptr t_m = tcg_temp_new_ptr();
2156 TCGv_i32 t_desc;
2157 int desc;
2158
2159 desc = vsz - 2;
2160 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2161 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2162
2163 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2164 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2165 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2166 t_desc = tcg_const_i32(desc);
2167
2168 fn(t_d, t_n, t_m, t_desc);
2169
2170 tcg_temp_free_ptr(t_d);
2171 tcg_temp_free_ptr(t_n);
2172 tcg_temp_free_ptr(t_m);
2173 tcg_temp_free_i32(t_desc);
2174 return true;
2175}
2176
2177static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2178 gen_helper_gvec_2 *fn)
2179{
2180 if (!sve_access_check(s)) {
2181 return true;
2182 }
2183
2184 unsigned vsz = pred_full_reg_size(s);
2185 TCGv_ptr t_d = tcg_temp_new_ptr();
2186 TCGv_ptr t_n = tcg_temp_new_ptr();
2187 TCGv_i32 t_desc;
2188 int desc;
2189
2190 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2191 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2192
2193 /* Predicate sizes may be smaller and cannot use simd_desc.
2194 We cannot round up, as we do elsewhere, because we need
2195 the exact size for ZIP2 and REV. We retain the style for
2196 the other helpers for consistency. */
2197
2198 desc = vsz - 2;
2199 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2200 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2201 t_desc = tcg_const_i32(desc);
2202
2203 fn(t_d, t_n, t_desc);
2204
2205 tcg_temp_free_i32(t_desc);
2206 tcg_temp_free_ptr(t_d);
2207 tcg_temp_free_ptr(t_n);
2208 return true;
2209}
2210
2211static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2212{
2213 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2214}
2215
2216static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2217{
2218 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2219}
2220
2221static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2222{
2223 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2224}
2225
2226static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2227{
2228 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2229}
2230
2231static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2232{
2233 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2234}
2235
2236static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2237{
2238 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2239}
2240
2241static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2242{
2243 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2244}
2245
2246static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2247{
2248 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2249}
2250
2251static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2252{
2253 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2254}
2255
234b48e9
RH
2256/*
2257 *** SVE Permute - Interleaving Group
2258 */
2259
2260static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2261{
2262 static gen_helper_gvec_3 * const fns[4] = {
2263 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2264 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2265 };
2266
2267 if (sve_access_check(s)) {
2268 unsigned vsz = vec_full_reg_size(s);
2269 unsigned high_ofs = high ? vsz / 2 : 0;
2270 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2271 vec_full_reg_offset(s, a->rn) + high_ofs,
2272 vec_full_reg_offset(s, a->rm) + high_ofs,
2273 vsz, vsz, 0, fns[a->esz]);
2274 }
2275 return true;
2276}
2277
2278static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2279 gen_helper_gvec_3 *fn)
2280{
2281 if (sve_access_check(s)) {
2282 unsigned vsz = vec_full_reg_size(s);
2283 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2284 vec_full_reg_offset(s, a->rn),
2285 vec_full_reg_offset(s, a->rm),
2286 vsz, vsz, data, fn);
2287 }
2288 return true;
2289}
2290
2291static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2292{
2293 return do_zip(s, a, false);
2294}
2295
2296static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2297{
2298 return do_zip(s, a, true);
2299}
2300
2301static gen_helper_gvec_3 * const uzp_fns[4] = {
2302 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2303 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2304};
2305
2306static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2307{
2308 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2309}
2310
2311static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2312{
2313 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2314}
2315
2316static gen_helper_gvec_3 * const trn_fns[4] = {
2317 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2318 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2319};
2320
2321static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2322{
2323 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2324}
2325
2326static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2327{
2328 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2329}
2330
3ca879ae
RH
2331/*
2332 *** SVE Permute Vector - Predicated Group
2333 */
2334
2335static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2336{
2337 static gen_helper_gvec_3 * const fns[4] = {
2338 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2339 };
2340 return do_zpz_ool(s, a, fns[a->esz]);
2341}
2342
ef23cb72
RH
2343/* Call the helper that computes the ARM LastActiveElement pseudocode
2344 * function, scaled by the element size. This includes the not found
2345 * indication; e.g. not found for esz=3 is -8.
2346 */
2347static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2348{
2349 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2350 * round up, as we do elsewhere, because we need the exact size.
2351 */
2352 TCGv_ptr t_p = tcg_temp_new_ptr();
2353 TCGv_i32 t_desc;
2354 unsigned vsz = pred_full_reg_size(s);
2355 unsigned desc;
2356
2357 desc = vsz - 2;
2358 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2359
2360 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2361 t_desc = tcg_const_i32(desc);
2362
2363 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2364
2365 tcg_temp_free_i32(t_desc);
2366 tcg_temp_free_ptr(t_p);
2367}
2368
2369/* Increment LAST to the offset of the next element in the vector,
2370 * wrapping around to 0.
2371 */
2372static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2373{
2374 unsigned vsz = vec_full_reg_size(s);
2375
2376 tcg_gen_addi_i32(last, last, 1 << esz);
2377 if (is_power_of_2(vsz)) {
2378 tcg_gen_andi_i32(last, last, vsz - 1);
2379 } else {
2380 TCGv_i32 max = tcg_const_i32(vsz);
2381 TCGv_i32 zero = tcg_const_i32(0);
2382 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2383 tcg_temp_free_i32(max);
2384 tcg_temp_free_i32(zero);
2385 }
2386}
2387
2388/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2389static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390{
2391 unsigned vsz = vec_full_reg_size(s);
2392
2393 if (is_power_of_2(vsz)) {
2394 tcg_gen_andi_i32(last, last, vsz - 1);
2395 } else {
2396 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2397 TCGv_i32 zero = tcg_const_i32(0);
2398 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2399 tcg_temp_free_i32(max);
2400 tcg_temp_free_i32(zero);
2401 }
2402}
2403
2404/* Load an unsigned element of ESZ from BASE+OFS. */
2405static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2406{
2407 TCGv_i64 r = tcg_temp_new_i64();
2408
2409 switch (esz) {
2410 case 0:
2411 tcg_gen_ld8u_i64(r, base, ofs);
2412 break;
2413 case 1:
2414 tcg_gen_ld16u_i64(r, base, ofs);
2415 break;
2416 case 2:
2417 tcg_gen_ld32u_i64(r, base, ofs);
2418 break;
2419 case 3:
2420 tcg_gen_ld_i64(r, base, ofs);
2421 break;
2422 default:
2423 g_assert_not_reached();
2424 }
2425 return r;
2426}
2427
2428/* Load an unsigned element of ESZ from RM[LAST]. */
2429static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2430 int rm, int esz)
2431{
2432 TCGv_ptr p = tcg_temp_new_ptr();
2433 TCGv_i64 r;
2434
2435 /* Convert offset into vector into offset into ENV.
2436 * The final adjustment for the vector register base
2437 * is added via constant offset to the load.
2438 */
2439#ifdef HOST_WORDS_BIGENDIAN
2440 /* Adjust for element ordering. See vec_reg_offset. */
2441 if (esz < 3) {
2442 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2443 }
2444#endif
2445 tcg_gen_ext_i32_ptr(p, last);
2446 tcg_gen_add_ptr(p, p, cpu_env);
2447
2448 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2449 tcg_temp_free_ptr(p);
2450
2451 return r;
2452}
2453
2454/* Compute CLAST for a Zreg. */
2455static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2456{
2457 TCGv_i32 last;
2458 TCGLabel *over;
2459 TCGv_i64 ele;
2460 unsigned vsz, esz = a->esz;
2461
2462 if (!sve_access_check(s)) {
2463 return true;
2464 }
2465
2466 last = tcg_temp_local_new_i32();
2467 over = gen_new_label();
2468
2469 find_last_active(s, last, esz, a->pg);
2470
2471 /* There is of course no movcond for a 2048-bit vector,
2472 * so we must branch over the actual store.
2473 */
2474 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2475
2476 if (!before) {
2477 incr_last_active(s, last, esz);
2478 }
2479
2480 ele = load_last_active(s, last, a->rm, esz);
2481 tcg_temp_free_i32(last);
2482
2483 vsz = vec_full_reg_size(s);
2484 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2485 tcg_temp_free_i64(ele);
2486
2487 /* If this insn used MOVPRFX, we may need a second move. */
2488 if (a->rd != a->rn) {
2489 TCGLabel *done = gen_new_label();
2490 tcg_gen_br(done);
2491
2492 gen_set_label(over);
2493 do_mov_z(s, a->rd, a->rn);
2494
2495 gen_set_label(done);
2496 } else {
2497 gen_set_label(over);
2498 }
2499 return true;
2500}
2501
2502static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2503{
2504 return do_clast_vector(s, a, false);
2505}
2506
2507static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2508{
2509 return do_clast_vector(s, a, true);
2510}
2511
2512/* Compute CLAST for a scalar. */
2513static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2514 bool before, TCGv_i64 reg_val)
2515{
2516 TCGv_i32 last = tcg_temp_new_i32();
2517 TCGv_i64 ele, cmp, zero;
2518
2519 find_last_active(s, last, esz, pg);
2520
2521 /* Extend the original value of last prior to incrementing. */
2522 cmp = tcg_temp_new_i64();
2523 tcg_gen_ext_i32_i64(cmp, last);
2524
2525 if (!before) {
2526 incr_last_active(s, last, esz);
2527 }
2528
2529 /* The conceit here is that while last < 0 indicates not found, after
2530 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2531 * from which we can load garbage. We then discard the garbage with
2532 * a conditional move.
2533 */
2534 ele = load_last_active(s, last, rm, esz);
2535 tcg_temp_free_i32(last);
2536
2537 zero = tcg_const_i64(0);
2538 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2539
2540 tcg_temp_free_i64(zero);
2541 tcg_temp_free_i64(cmp);
2542 tcg_temp_free_i64(ele);
2543}
2544
2545/* Compute CLAST for a Vreg. */
2546static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2547{
2548 if (sve_access_check(s)) {
2549 int esz = a->esz;
2550 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2551 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2552
2553 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2554 write_fp_dreg(s, a->rd, reg);
2555 tcg_temp_free_i64(reg);
2556 }
2557 return true;
2558}
2559
2560static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2561{
2562 return do_clast_fp(s, a, false);
2563}
2564
2565static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2566{
2567 return do_clast_fp(s, a, true);
2568}
2569
2570/* Compute CLAST for a Xreg. */
2571static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2572{
2573 TCGv_i64 reg;
2574
2575 if (!sve_access_check(s)) {
2576 return true;
2577 }
2578
2579 reg = cpu_reg(s, a->rd);
2580 switch (a->esz) {
2581 case 0:
2582 tcg_gen_ext8u_i64(reg, reg);
2583 break;
2584 case 1:
2585 tcg_gen_ext16u_i64(reg, reg);
2586 break;
2587 case 2:
2588 tcg_gen_ext32u_i64(reg, reg);
2589 break;
2590 case 3:
2591 break;
2592 default:
2593 g_assert_not_reached();
2594 }
2595
2596 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2597 return true;
2598}
2599
2600static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2601{
2602 return do_clast_general(s, a, false);
2603}
2604
2605static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2606{
2607 return do_clast_general(s, a, true);
2608}
2609
2610/* Compute LAST for a scalar. */
2611static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2612 int pg, int rm, bool before)
2613{
2614 TCGv_i32 last = tcg_temp_new_i32();
2615 TCGv_i64 ret;
2616
2617 find_last_active(s, last, esz, pg);
2618 if (before) {
2619 wrap_last_active(s, last, esz);
2620 } else {
2621 incr_last_active(s, last, esz);
2622 }
2623
2624 ret = load_last_active(s, last, rm, esz);
2625 tcg_temp_free_i32(last);
2626 return ret;
2627}
2628
2629/* Compute LAST for a Vreg. */
2630static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2631{
2632 if (sve_access_check(s)) {
2633 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2634 write_fp_dreg(s, a->rd, val);
2635 tcg_temp_free_i64(val);
2636 }
2637 return true;
2638}
2639
2640static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2641{
2642 return do_last_fp(s, a, false);
2643}
2644
2645static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2646{
2647 return do_last_fp(s, a, true);
2648}
2649
2650/* Compute LAST for a Xreg. */
2651static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2652{
2653 if (sve_access_check(s)) {
2654 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2655 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2656 tcg_temp_free_i64(val);
2657 }
2658 return true;
2659}
2660
2661static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2662{
2663 return do_last_general(s, a, false);
2664}
2665
2666static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2667{
2668 return do_last_general(s, a, true);
2669}
2670
792a5578
RH
2671static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2672{
2673 if (sve_access_check(s)) {
2674 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2675 }
2676 return true;
2677}
2678
2679static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2680{
2681 if (sve_access_check(s)) {
2682 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2683 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2684 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2685 tcg_temp_free_i64(t);
2686 }
2687 return true;
2688}
2689
dae8fb90
RH
2690static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2691{
2692 static gen_helper_gvec_3 * const fns[4] = {
2693 NULL,
2694 gen_helper_sve_revb_h,
2695 gen_helper_sve_revb_s,
2696 gen_helper_sve_revb_d,
2697 };
2698 return do_zpz_ool(s, a, fns[a->esz]);
2699}
2700
2701static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2702{
2703 static gen_helper_gvec_3 * const fns[4] = {
2704 NULL,
2705 NULL,
2706 gen_helper_sve_revh_s,
2707 gen_helper_sve_revh_d,
2708 };
2709 return do_zpz_ool(s, a, fns[a->esz]);
2710}
2711
2712static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2713{
2714 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2715}
2716
2717static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2718{
2719 static gen_helper_gvec_3 * const fns[4] = {
2720 gen_helper_sve_rbit_b,
2721 gen_helper_sve_rbit_h,
2722 gen_helper_sve_rbit_s,
2723 gen_helper_sve_rbit_d,
2724 };
2725 return do_zpz_ool(s, a, fns[a->esz]);
2726}
2727
b48ff240
RH
2728static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2729{
2730 if (sve_access_check(s)) {
2731 unsigned vsz = vec_full_reg_size(s);
2732 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2733 vec_full_reg_offset(s, a->rn),
2734 vec_full_reg_offset(s, a->rm),
2735 pred_full_reg_offset(s, a->pg),
2736 vsz, vsz, a->esz, gen_helper_sve_splice);
2737 }
2738 return true;
2739}
2740
757f9cff
RH
2741/*
2742 *** SVE Integer Compare - Vectors Group
2743 */
2744
2745static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2746 gen_helper_gvec_flags_4 *gen_fn)
2747{
2748 TCGv_ptr pd, zn, zm, pg;
2749 unsigned vsz;
2750 TCGv_i32 t;
2751
2752 if (gen_fn == NULL) {
2753 return false;
2754 }
2755 if (!sve_access_check(s)) {
2756 return true;
2757 }
2758
2759 vsz = vec_full_reg_size(s);
2760 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2761 pd = tcg_temp_new_ptr();
2762 zn = tcg_temp_new_ptr();
2763 zm = tcg_temp_new_ptr();
2764 pg = tcg_temp_new_ptr();
2765
2766 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2767 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2768 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2769 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2770
2771 gen_fn(t, pd, zn, zm, pg, t);
2772
2773 tcg_temp_free_ptr(pd);
2774 tcg_temp_free_ptr(zn);
2775 tcg_temp_free_ptr(zm);
2776 tcg_temp_free_ptr(pg);
2777
2778 do_pred_flags(t);
2779
2780 tcg_temp_free_i32(t);
2781 return true;
2782}
2783
2784#define DO_PPZZ(NAME, name) \
2785static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2786 uint32_t insn) \
2787{ \
2788 static gen_helper_gvec_flags_4 * const fns[4] = { \
2789 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2790 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2791 }; \
2792 return do_ppzz_flags(s, a, fns[a->esz]); \
2793}
2794
2795DO_PPZZ(CMPEQ, cmpeq)
2796DO_PPZZ(CMPNE, cmpne)
2797DO_PPZZ(CMPGT, cmpgt)
2798DO_PPZZ(CMPGE, cmpge)
2799DO_PPZZ(CMPHI, cmphi)
2800DO_PPZZ(CMPHS, cmphs)
2801
2802#undef DO_PPZZ
2803
2804#define DO_PPZW(NAME, name) \
2805static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2806 uint32_t insn) \
2807{ \
2808 static gen_helper_gvec_flags_4 * const fns[4] = { \
2809 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2810 gen_helper_sve_##name##_ppzw_s, NULL \
2811 }; \
2812 return do_ppzz_flags(s, a, fns[a->esz]); \
2813}
2814
2815DO_PPZW(CMPEQ, cmpeq)
2816DO_PPZW(CMPNE, cmpne)
2817DO_PPZW(CMPGT, cmpgt)
2818DO_PPZW(CMPGE, cmpge)
2819DO_PPZW(CMPHI, cmphi)
2820DO_PPZW(CMPHS, cmphs)
2821DO_PPZW(CMPLT, cmplt)
2822DO_PPZW(CMPLE, cmple)
2823DO_PPZW(CMPLO, cmplo)
2824DO_PPZW(CMPLS, cmpls)
2825
2826#undef DO_PPZW
2827
38cadeba
RH
2828/*
2829 *** SVE Integer Compare - Immediate Groups
2830 */
2831
2832static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2833 gen_helper_gvec_flags_3 *gen_fn)
2834{
2835 TCGv_ptr pd, zn, pg;
2836 unsigned vsz;
2837 TCGv_i32 t;
2838
2839 if (gen_fn == NULL) {
2840 return false;
2841 }
2842 if (!sve_access_check(s)) {
2843 return true;
2844 }
2845
2846 vsz = vec_full_reg_size(s);
2847 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2848 pd = tcg_temp_new_ptr();
2849 zn = tcg_temp_new_ptr();
2850 pg = tcg_temp_new_ptr();
2851
2852 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2853 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2854 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2855
2856 gen_fn(t, pd, zn, pg, t);
2857
2858 tcg_temp_free_ptr(pd);
2859 tcg_temp_free_ptr(zn);
2860 tcg_temp_free_ptr(pg);
2861
2862 do_pred_flags(t);
2863
2864 tcg_temp_free_i32(t);
2865 return true;
2866}
2867
2868#define DO_PPZI(NAME, name) \
2869static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2870 uint32_t insn) \
2871{ \
2872 static gen_helper_gvec_flags_3 * const fns[4] = { \
2873 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2874 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2875 }; \
2876 return do_ppzi_flags(s, a, fns[a->esz]); \
2877}
2878
2879DO_PPZI(CMPEQ, cmpeq)
2880DO_PPZI(CMPNE, cmpne)
2881DO_PPZI(CMPGT, cmpgt)
2882DO_PPZI(CMPGE, cmpge)
2883DO_PPZI(CMPHI, cmphi)
2884DO_PPZI(CMPHS, cmphs)
2885DO_PPZI(CMPLT, cmplt)
2886DO_PPZI(CMPLE, cmple)
2887DO_PPZI(CMPLO, cmplo)
2888DO_PPZI(CMPLS, cmpls)
2889
2890#undef DO_PPZI
2891
35da316f
RH
2892/*
2893 *** SVE Partition Break Group
2894 */
2895
2896static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2897 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2898{
2899 if (!sve_access_check(s)) {
2900 return true;
2901 }
2902
2903 unsigned vsz = pred_full_reg_size(s);
2904
2905 /* Predicate sizes may be smaller and cannot use simd_desc. */
2906 TCGv_ptr d = tcg_temp_new_ptr();
2907 TCGv_ptr n = tcg_temp_new_ptr();
2908 TCGv_ptr m = tcg_temp_new_ptr();
2909 TCGv_ptr g = tcg_temp_new_ptr();
2910 TCGv_i32 t = tcg_const_i32(vsz - 2);
2911
2912 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2913 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2914 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2915 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2916
2917 if (a->s) {
2918 fn_s(t, d, n, m, g, t);
2919 do_pred_flags(t);
2920 } else {
2921 fn(d, n, m, g, t);
2922 }
2923 tcg_temp_free_ptr(d);
2924 tcg_temp_free_ptr(n);
2925 tcg_temp_free_ptr(m);
2926 tcg_temp_free_ptr(g);
2927 tcg_temp_free_i32(t);
2928 return true;
2929}
2930
2931static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2932 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2933{
2934 if (!sve_access_check(s)) {
2935 return true;
2936 }
2937
2938 unsigned vsz = pred_full_reg_size(s);
2939
2940 /* Predicate sizes may be smaller and cannot use simd_desc. */
2941 TCGv_ptr d = tcg_temp_new_ptr();
2942 TCGv_ptr n = tcg_temp_new_ptr();
2943 TCGv_ptr g = tcg_temp_new_ptr();
2944 TCGv_i32 t = tcg_const_i32(vsz - 2);
2945
2946 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2947 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2948 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2949
2950 if (a->s) {
2951 fn_s(t, d, n, g, t);
2952 do_pred_flags(t);
2953 } else {
2954 fn(d, n, g, t);
2955 }
2956 tcg_temp_free_ptr(d);
2957 tcg_temp_free_ptr(n);
2958 tcg_temp_free_ptr(g);
2959 tcg_temp_free_i32(t);
2960 return true;
2961}
2962
2963static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2964{
2965 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2966}
2967
2968static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2969{
2970 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2971}
2972
2973static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2974{
2975 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2976}
2977
2978static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2979{
2980 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2981}
2982
2983static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2984{
2985 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2986}
2987
2988static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2989{
2990 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2991}
2992
2993static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2994{
2995 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2996}
2997
9ee3a611
RH
2998/*
2999 *** SVE Predicate Count Group
3000 */
3001
3002static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3003{
3004 unsigned psz = pred_full_reg_size(s);
3005
3006 if (psz <= 8) {
3007 uint64_t psz_mask;
3008
3009 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3010 if (pn != pg) {
3011 TCGv_i64 g = tcg_temp_new_i64();
3012 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3013 tcg_gen_and_i64(val, val, g);
3014 tcg_temp_free_i64(g);
3015 }
3016
3017 /* Reduce the pred_esz_masks value simply to reduce the
3018 * size of the code generated here.
3019 */
3020 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3021 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3022
3023 tcg_gen_ctpop_i64(val, val);
3024 } else {
3025 TCGv_ptr t_pn = tcg_temp_new_ptr();
3026 TCGv_ptr t_pg = tcg_temp_new_ptr();
3027 unsigned desc;
3028 TCGv_i32 t_desc;
3029
3030 desc = psz - 2;
3031 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3032
3033 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3034 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3035 t_desc = tcg_const_i32(desc);
3036
3037 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3038 tcg_temp_free_ptr(t_pn);
3039 tcg_temp_free_ptr(t_pg);
3040 tcg_temp_free_i32(t_desc);
3041 }
3042}
3043
3044static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3045{
3046 if (sve_access_check(s)) {
3047 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3048 }
3049 return true;
3050}
3051
3052static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3053 uint32_t insn)
3054{
3055 if (sve_access_check(s)) {
3056 TCGv_i64 reg = cpu_reg(s, a->rd);
3057 TCGv_i64 val = tcg_temp_new_i64();
3058
3059 do_cntp(s, val, a->esz, a->pg, a->pg);
3060 if (a->d) {
3061 tcg_gen_sub_i64(reg, reg, val);
3062 } else {
3063 tcg_gen_add_i64(reg, reg, val);
3064 }
3065 tcg_temp_free_i64(val);
3066 }
3067 return true;
3068}
3069
3070static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3071 uint32_t insn)
3072{
3073 if (a->esz == 0) {
3074 return false;
3075 }
3076 if (sve_access_check(s)) {
3077 unsigned vsz = vec_full_reg_size(s);
3078 TCGv_i64 val = tcg_temp_new_i64();
3079 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3080
3081 do_cntp(s, val, a->esz, a->pg, a->pg);
3082 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3083 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3084 }
3085 return true;
3086}
3087
3088static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3089 uint32_t insn)
3090{
3091 if (sve_access_check(s)) {
3092 TCGv_i64 reg = cpu_reg(s, a->rd);
3093 TCGv_i64 val = tcg_temp_new_i64();
3094
3095 do_cntp(s, val, a->esz, a->pg, a->pg);
3096 do_sat_addsub_32(reg, val, a->u, a->d);
3097 }
3098 return true;
3099}
3100
3101static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3102 uint32_t insn)
3103{
3104 if (sve_access_check(s)) {
3105 TCGv_i64 reg = cpu_reg(s, a->rd);
3106 TCGv_i64 val = tcg_temp_new_i64();
3107
3108 do_cntp(s, val, a->esz, a->pg, a->pg);
3109 do_sat_addsub_64(reg, val, a->u, a->d);
3110 }
3111 return true;
3112}
3113
3114static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3115 uint32_t insn)
3116{
3117 if (a->esz == 0) {
3118 return false;
3119 }
3120 if (sve_access_check(s)) {
3121 TCGv_i64 val = tcg_temp_new_i64();
3122 do_cntp(s, val, a->esz, a->pg, a->pg);
3123 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3124 }
3125 return true;
3126}
3127
caf1cefc
RH
3128/*
3129 *** SVE Integer Compare Scalars Group
3130 */
3131
3132static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3133{
3134 if (!sve_access_check(s)) {
3135 return true;
3136 }
3137
3138 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3139 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3140 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3141 TCGv_i64 cmp = tcg_temp_new_i64();
3142
3143 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3144 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3145 tcg_temp_free_i64(cmp);
3146
3147 /* VF = !NF & !CF. */
3148 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3149 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3150
3151 /* Both NF and VF actually look at bit 31. */
3152 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3153 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3154 return true;
3155}
3156
3157static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3158{
3159 if (!sve_access_check(s)) {
3160 return true;
3161 }
3162
3163 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3164 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3165 TCGv_i64 t0 = tcg_temp_new_i64();
3166 TCGv_i64 t1 = tcg_temp_new_i64();
3167 TCGv_i32 t2, t3;
3168 TCGv_ptr ptr;
3169 unsigned desc, vsz = vec_full_reg_size(s);
3170 TCGCond cond;
3171
3172 if (!a->sf) {
3173 if (a->u) {
3174 tcg_gen_ext32u_i64(op0, op0);
3175 tcg_gen_ext32u_i64(op1, op1);
3176 } else {
3177 tcg_gen_ext32s_i64(op0, op0);
3178 tcg_gen_ext32s_i64(op1, op1);
3179 }
3180 }
3181
3182 /* For the helper, compress the different conditions into a computation
3183 * of how many iterations for which the condition is true.
3184 *
3185 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3186 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3187 * aren't that large, so any value >= predicate size is sufficient.
3188 */
3189 tcg_gen_sub_i64(t0, op1, op0);
3190
3191 /* t0 = MIN(op1 - op0, vsz). */
3192 tcg_gen_movi_i64(t1, vsz);
3193 tcg_gen_umin_i64(t0, t0, t1);
3194 if (a->eq) {
3195 /* Equality means one more iteration. */
3196 tcg_gen_addi_i64(t0, t0, 1);
3197 }
3198
3199 /* t0 = (condition true ? t0 : 0). */
3200 cond = (a->u
3201 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3202 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3203 tcg_gen_movi_i64(t1, 0);
3204 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3205
3206 t2 = tcg_temp_new_i32();
3207 tcg_gen_extrl_i64_i32(t2, t0);
3208 tcg_temp_free_i64(t0);
3209 tcg_temp_free_i64(t1);
3210
3211 desc = (vsz / 8) - 2;
3212 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3213 t3 = tcg_const_i32(desc);
3214
3215 ptr = tcg_temp_new_ptr();
3216 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3217
3218 gen_helper_sve_while(t2, ptr, t2, t3);
3219 do_pred_flags(t2);
3220
3221 tcg_temp_free_ptr(ptr);
3222 tcg_temp_free_i32(t2);
3223 tcg_temp_free_i32(t3);
3224 return true;
3225}
3226
ed491961
RH
3227/*
3228 *** SVE Integer Wide Immediate - Unpredicated Group
3229 */
3230
3231static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3232{
3233 if (a->esz == 0) {
3234 return false;
3235 }
3236 if (sve_access_check(s)) {
3237 unsigned vsz = vec_full_reg_size(s);
3238 int dofs = vec_full_reg_offset(s, a->rd);
3239 uint64_t imm;
3240
3241 /* Decode the VFP immediate. */
3242 imm = vfp_expand_imm(a->esz, a->imm);
3243 imm = dup_const(a->esz, imm);
3244
3245 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3246 }
3247 return true;
3248}
3249
3250static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3251{
3252 if (a->esz == 0 && extract32(insn, 13, 1)) {
3253 return false;
3254 }
3255 if (sve_access_check(s)) {
3256 unsigned vsz = vec_full_reg_size(s);
3257 int dofs = vec_full_reg_offset(s, a->rd);
3258
3259 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3260 }
3261 return true;
3262}
3263
6e6a157d
RH
3264static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3265{
3266 if (a->esz == 0 && extract32(insn, 13, 1)) {
3267 return false;
3268 }
3269 if (sve_access_check(s)) {
3270 unsigned vsz = vec_full_reg_size(s);
3271 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3272 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3273 }
3274 return true;
3275}
3276
3277static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3278{
3279 a->imm = -a->imm;
3280 return trans_ADD_zzi(s, a, insn);
3281}
3282
3283static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3284{
3285 static const GVecGen2s op[4] = {
3286 { .fni8 = tcg_gen_vec_sub8_i64,
3287 .fniv = tcg_gen_sub_vec,
3288 .fno = gen_helper_sve_subri_b,
3289 .opc = INDEX_op_sub_vec,
3290 .vece = MO_8,
3291 .scalar_first = true },
3292 { .fni8 = tcg_gen_vec_sub16_i64,
3293 .fniv = tcg_gen_sub_vec,
3294 .fno = gen_helper_sve_subri_h,
3295 .opc = INDEX_op_sub_vec,
3296 .vece = MO_16,
3297 .scalar_first = true },
3298 { .fni4 = tcg_gen_sub_i32,
3299 .fniv = tcg_gen_sub_vec,
3300 .fno = gen_helper_sve_subri_s,
3301 .opc = INDEX_op_sub_vec,
3302 .vece = MO_32,
3303 .scalar_first = true },
3304 { .fni8 = tcg_gen_sub_i64,
3305 .fniv = tcg_gen_sub_vec,
3306 .fno = gen_helper_sve_subri_d,
3307 .opc = INDEX_op_sub_vec,
3308 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3309 .vece = MO_64,
3310 .scalar_first = true }
3311 };
3312
3313 if (a->esz == 0 && extract32(insn, 13, 1)) {
3314 return false;
3315 }
3316 if (sve_access_check(s)) {
3317 unsigned vsz = vec_full_reg_size(s);
3318 TCGv_i64 c = tcg_const_i64(a->imm);
3319 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3320 vec_full_reg_offset(s, a->rn),
3321 vsz, vsz, c, &op[a->esz]);
3322 tcg_temp_free_i64(c);
3323 }
3324 return true;
3325}
3326
3327static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3328{
3329 if (sve_access_check(s)) {
3330 unsigned vsz = vec_full_reg_size(s);
3331 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3332 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3333 }
3334 return true;
3335}
3336
3337static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3338 bool u, bool d)
3339{
3340 if (a->esz == 0 && extract32(insn, 13, 1)) {
3341 return false;
3342 }
3343 if (sve_access_check(s)) {
3344 TCGv_i64 val = tcg_const_i64(a->imm);
3345 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3346 tcg_temp_free_i64(val);
3347 }
3348 return true;
3349}
3350
3351static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3352{
3353 return do_zzi_sat(s, a, insn, false, false);
3354}
3355
3356static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3357{
3358 return do_zzi_sat(s, a, insn, true, false);
3359}
3360
3361static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3362{
3363 return do_zzi_sat(s, a, insn, false, true);
3364}
3365
3366static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3367{
3368 return do_zzi_sat(s, a, insn, true, true);
3369}
3370
3371static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3372{
3373 if (sve_access_check(s)) {
3374 unsigned vsz = vec_full_reg_size(s);
3375 TCGv_i64 c = tcg_const_i64(a->imm);
3376
3377 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3378 vec_full_reg_offset(s, a->rn),
3379 c, vsz, vsz, 0, fn);
3380 tcg_temp_free_i64(c);
3381 }
3382 return true;
3383}
3384
3385#define DO_ZZI(NAME, name) \
3386static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3387 uint32_t insn) \
3388{ \
3389 static gen_helper_gvec_2i * const fns[4] = { \
3390 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3391 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3392 }; \
3393 return do_zzi_ool(s, a, fns[a->esz]); \
3394}
3395
3396DO_ZZI(SMAX, smax)
3397DO_ZZI(UMAX, umax)
3398DO_ZZI(SMIN, smin)
3399DO_ZZI(UMIN, umin)
3400
3401#undef DO_ZZI
3402
ca40a6e6
RH
3403/*
3404 *** SVE Floating Point Multiply-Add Indexed Group
3405 */
3406
3407static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3408{
3409 static gen_helper_gvec_4_ptr * const fns[3] = {
3410 gen_helper_gvec_fmla_idx_h,
3411 gen_helper_gvec_fmla_idx_s,
3412 gen_helper_gvec_fmla_idx_d,
3413 };
3414
3415 if (sve_access_check(s)) {
3416 unsigned vsz = vec_full_reg_size(s);
3417 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3418 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3419 vec_full_reg_offset(s, a->rn),
3420 vec_full_reg_offset(s, a->rm),
3421 vec_full_reg_offset(s, a->ra),
3422 status, vsz, vsz, (a->index << 1) | a->sub,
3423 fns[a->esz - 1]);
3424 tcg_temp_free_ptr(status);
3425 }
3426 return true;
3427}
3428
3429/*
3430 *** SVE Floating Point Multiply Indexed Group
3431 */
3432
3433static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3434{
3435 static gen_helper_gvec_3_ptr * const fns[3] = {
3436 gen_helper_gvec_fmul_idx_h,
3437 gen_helper_gvec_fmul_idx_s,
3438 gen_helper_gvec_fmul_idx_d,
3439 };
3440
3441 if (sve_access_check(s)) {
3442 unsigned vsz = vec_full_reg_size(s);
3443 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3444 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3445 vec_full_reg_offset(s, a->rn),
3446 vec_full_reg_offset(s, a->rm),
3447 status, vsz, vsz, a->index, fns[a->esz - 1]);
3448 tcg_temp_free_ptr(status);
3449 }
3450 return true;
3451}
3452
23fbe79f
RH
3453/*
3454 *** SVE Floating Point Fast Reduction Group
3455 */
3456
3457typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3458 TCGv_ptr, TCGv_i32);
3459
3460static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3461 gen_helper_fp_reduce *fn)
3462{
3463 unsigned vsz = vec_full_reg_size(s);
3464 unsigned p2vsz = pow2ceil(vsz);
3465 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3466 TCGv_ptr t_zn, t_pg, status;
3467 TCGv_i64 temp;
3468
3469 temp = tcg_temp_new_i64();
3470 t_zn = tcg_temp_new_ptr();
3471 t_pg = tcg_temp_new_ptr();
3472
3473 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3474 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3475 status = get_fpstatus_ptr(a->esz == MO_16);
3476
3477 fn(temp, t_zn, t_pg, status, t_desc);
3478 tcg_temp_free_ptr(t_zn);
3479 tcg_temp_free_ptr(t_pg);
3480 tcg_temp_free_ptr(status);
3481 tcg_temp_free_i32(t_desc);
3482
3483 write_fp_dreg(s, a->rd, temp);
3484 tcg_temp_free_i64(temp);
3485}
3486
3487#define DO_VPZ(NAME, name) \
3488static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3489{ \
3490 static gen_helper_fp_reduce * const fns[3] = { \
3491 gen_helper_sve_##name##_h, \
3492 gen_helper_sve_##name##_s, \
3493 gen_helper_sve_##name##_d, \
3494 }; \
3495 if (a->esz == 0) { \
3496 return false; \
3497 } \
3498 if (sve_access_check(s)) { \
3499 do_reduce(s, a, fns[a->esz - 1]); \
3500 } \
3501 return true; \
3502}
3503
3504DO_VPZ(FADDV, faddv)
3505DO_VPZ(FMINNMV, fminnmv)
3506DO_VPZ(FMAXNMV, fmaxnmv)
3507DO_VPZ(FMINV, fminv)
3508DO_VPZ(FMAXV, fmaxv)
3509
3887c038
RH
3510/*
3511 *** SVE Floating Point Unary Operations - Unpredicated Group
3512 */
3513
3514static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3515{
3516 unsigned vsz = vec_full_reg_size(s);
3517 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3518
3519 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3520 vec_full_reg_offset(s, a->rn),
3521 status, vsz, vsz, 0, fn);
3522 tcg_temp_free_ptr(status);
3523}
3524
3525static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3526{
3527 static gen_helper_gvec_2_ptr * const fns[3] = {
3528 gen_helper_gvec_frecpe_h,
3529 gen_helper_gvec_frecpe_s,
3530 gen_helper_gvec_frecpe_d,
3531 };
3532 if (a->esz == 0) {
3533 return false;
3534 }
3535 if (sve_access_check(s)) {
3536 do_zz_fp(s, a, fns[a->esz - 1]);
3537 }
3538 return true;
3539}
3540
3541static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3542{
3543 static gen_helper_gvec_2_ptr * const fns[3] = {
3544 gen_helper_gvec_frsqrte_h,
3545 gen_helper_gvec_frsqrte_s,
3546 gen_helper_gvec_frsqrte_d,
3547 };
3548 if (a->esz == 0) {
3549 return false;
3550 }
3551 if (sve_access_check(s)) {
3552 do_zz_fp(s, a, fns[a->esz - 1]);
3553 }
3554 return true;
3555}
3556
4d2e2a03
RH
3557/*
3558 *** SVE Floating Point Compare with Zero Group
3559 */
3560
3561static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3562 gen_helper_gvec_3_ptr *fn)
3563{
3564 unsigned vsz = vec_full_reg_size(s);
3565 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3566
3567 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3568 vec_full_reg_offset(s, a->rn),
3569 pred_full_reg_offset(s, a->pg),
3570 status, vsz, vsz, 0, fn);
3571 tcg_temp_free_ptr(status);
3572}
3573
3574#define DO_PPZ(NAME, name) \
3575static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3576{ \
3577 static gen_helper_gvec_3_ptr * const fns[3] = { \
3578 gen_helper_sve_##name##_h, \
3579 gen_helper_sve_##name##_s, \
3580 gen_helper_sve_##name##_d, \
3581 }; \
3582 if (a->esz == 0) { \
3583 return false; \
3584 } \
3585 if (sve_access_check(s)) { \
3586 do_ppz_fp(s, a, fns[a->esz - 1]); \
3587 } \
3588 return true; \
3589}
3590
3591DO_PPZ(FCMGE_ppz0, fcmge0)
3592DO_PPZ(FCMGT_ppz0, fcmgt0)
3593DO_PPZ(FCMLE_ppz0, fcmle0)
3594DO_PPZ(FCMLT_ppz0, fcmlt0)
3595DO_PPZ(FCMEQ_ppz0, fcmeq0)
3596DO_PPZ(FCMNE_ppz0, fcmne0)
3597
3598#undef DO_PPZ
3599
7f9ddf64
RH
3600/*
3601 *** SVE Floating Point Accumulating Reduction Group
3602 */
3603
3604static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3605{
3606 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3607 TCGv_ptr, TCGv_ptr, TCGv_i32);
3608 static fadda_fn * const fns[3] = {
3609 gen_helper_sve_fadda_h,
3610 gen_helper_sve_fadda_s,
3611 gen_helper_sve_fadda_d,
3612 };
3613 unsigned vsz = vec_full_reg_size(s);
3614 TCGv_ptr t_rm, t_pg, t_fpst;
3615 TCGv_i64 t_val;
3616 TCGv_i32 t_desc;
3617
3618 if (a->esz == 0) {
3619 return false;
3620 }
3621 if (!sve_access_check(s)) {
3622 return true;
3623 }
3624
3625 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3626 t_rm = tcg_temp_new_ptr();
3627 t_pg = tcg_temp_new_ptr();
3628 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3629 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3630 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3631 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3632
3633 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3634
3635 tcg_temp_free_i32(t_desc);
3636 tcg_temp_free_ptr(t_fpst);
3637 tcg_temp_free_ptr(t_pg);
3638 tcg_temp_free_ptr(t_rm);
3639
3640 write_fp_dreg(s, a->rd, t_val);
3641 tcg_temp_free_i64(t_val);
3642 return true;
3643}
3644
29b80469
RH
3645/*
3646 *** SVE Floating Point Arithmetic - Unpredicated Group
3647 */
3648
3649static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3650 gen_helper_gvec_3_ptr *fn)
3651{
3652 if (fn == NULL) {
3653 return false;
3654 }
3655 if (sve_access_check(s)) {
3656 unsigned vsz = vec_full_reg_size(s);
3657 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3658 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3659 vec_full_reg_offset(s, a->rn),
3660 vec_full_reg_offset(s, a->rm),
3661 status, vsz, vsz, 0, fn);
3662 tcg_temp_free_ptr(status);
3663 }
3664 return true;
3665}
3666
3667
3668#define DO_FP3(NAME, name) \
3669static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3670{ \
3671 static gen_helper_gvec_3_ptr * const fns[4] = { \
3672 NULL, gen_helper_gvec_##name##_h, \
3673 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3674 }; \
3675 return do_zzz_fp(s, a, fns[a->esz]); \
3676}
3677
3678DO_FP3(FADD_zzz, fadd)
3679DO_FP3(FSUB_zzz, fsub)
3680DO_FP3(FMUL_zzz, fmul)
3681DO_FP3(FTSMUL, ftsmul)
3682DO_FP3(FRECPS, recps)
3683DO_FP3(FRSQRTS, rsqrts)
3684
3685#undef DO_FP3
3686
ec3b87c2
RH
3687/*
3688 *** SVE Floating Point Arithmetic - Predicated Group
3689 */
3690
3691static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3692 gen_helper_gvec_4_ptr *fn)
3693{
3694 if (fn == NULL) {
3695 return false;
3696 }
3697 if (sve_access_check(s)) {
3698 unsigned vsz = vec_full_reg_size(s);
3699 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3700 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3701 vec_full_reg_offset(s, a->rn),
3702 vec_full_reg_offset(s, a->rm),
3703 pred_full_reg_offset(s, a->pg),
3704 status, vsz, vsz, 0, fn);
3705 tcg_temp_free_ptr(status);
3706 }
3707 return true;
3708}
3709
3710#define DO_FP3(NAME, name) \
3711static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3712{ \
3713 static gen_helper_gvec_4_ptr * const fns[4] = { \
3714 NULL, gen_helper_sve_##name##_h, \
3715 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3716 }; \
3717 return do_zpzz_fp(s, a, fns[a->esz]); \
3718}
3719
3720DO_FP3(FADD_zpzz, fadd)
3721DO_FP3(FSUB_zpzz, fsub)
3722DO_FP3(FMUL_zpzz, fmul)
3723DO_FP3(FMIN_zpzz, fmin)
3724DO_FP3(FMAX_zpzz, fmax)
3725DO_FP3(FMINNM_zpzz, fminnum)
3726DO_FP3(FMAXNM_zpzz, fmaxnum)
3727DO_FP3(FABD, fabd)
3728DO_FP3(FSCALE, fscalbn)
3729DO_FP3(FDIV, fdiv)
3730DO_FP3(FMULX, fmulx)
3731
3732#undef DO_FP3
8092c6a3 3733
cc48affe
RH
3734typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3735 TCGv_i64, TCGv_ptr, TCGv_i32);
3736
3737static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3738 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3739{
3740 unsigned vsz = vec_full_reg_size(s);
3741 TCGv_ptr t_zd, t_zn, t_pg, status;
3742 TCGv_i32 desc;
3743
3744 t_zd = tcg_temp_new_ptr();
3745 t_zn = tcg_temp_new_ptr();
3746 t_pg = tcg_temp_new_ptr();
3747 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3748 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3749 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3750
3751 status = get_fpstatus_ptr(is_fp16);
3752 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3753 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3754
3755 tcg_temp_free_i32(desc);
3756 tcg_temp_free_ptr(status);
3757 tcg_temp_free_ptr(t_pg);
3758 tcg_temp_free_ptr(t_zn);
3759 tcg_temp_free_ptr(t_zd);
3760}
3761
3762static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3763 gen_helper_sve_fp2scalar *fn)
3764{
3765 TCGv_i64 temp = tcg_const_i64(imm);
3766 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3767 tcg_temp_free_i64(temp);
3768}
3769
3770#define DO_FP_IMM(NAME, name, const0, const1) \
3771static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a, \
3772 uint32_t insn) \
3773{ \
3774 static gen_helper_sve_fp2scalar * const fns[3] = { \
3775 gen_helper_sve_##name##_h, \
3776 gen_helper_sve_##name##_s, \
3777 gen_helper_sve_##name##_d \
3778 }; \
3779 static uint64_t const val[3][2] = { \
3780 { float16_##const0, float16_##const1 }, \
3781 { float32_##const0, float32_##const1 }, \
3782 { float64_##const0, float64_##const1 }, \
3783 }; \
3784 if (a->esz == 0) { \
3785 return false; \
3786 } \
3787 if (sve_access_check(s)) { \
3788 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3789 } \
3790 return true; \
3791}
3792
3793#define float16_two make_float16(0x4000)
3794#define float32_two make_float32(0x40000000)
3795#define float64_two make_float64(0x4000000000000000ULL)
3796
3797DO_FP_IMM(FADD, fadds, half, one)
3798DO_FP_IMM(FSUB, fsubs, half, one)
3799DO_FP_IMM(FMUL, fmuls, half, two)
3800DO_FP_IMM(FSUBR, fsubrs, half, one)
3801DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3802DO_FP_IMM(FMINNM, fminnms, zero, one)
3803DO_FP_IMM(FMAX, fmaxs, zero, one)
3804DO_FP_IMM(FMIN, fmins, zero, one)
3805
3806#undef DO_FP_IMM
3807
abfdefd5
RH
3808static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3809 gen_helper_gvec_4_ptr *fn)
3810{
3811 if (fn == NULL) {
3812 return false;
3813 }
3814 if (sve_access_check(s)) {
3815 unsigned vsz = vec_full_reg_size(s);
3816 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3817 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3818 vec_full_reg_offset(s, a->rn),
3819 vec_full_reg_offset(s, a->rm),
3820 pred_full_reg_offset(s, a->pg),
3821 status, vsz, vsz, 0, fn);
3822 tcg_temp_free_ptr(status);
3823 }
3824 return true;
3825}
3826
3827#define DO_FPCMP(NAME, name) \
3828static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
3829 uint32_t insn) \
3830{ \
3831 static gen_helper_gvec_4_ptr * const fns[4] = { \
3832 NULL, gen_helper_sve_##name##_h, \
3833 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3834 }; \
3835 return do_fp_cmp(s, a, fns[a->esz]); \
3836}
3837
3838DO_FPCMP(FCMGE, fcmge)
3839DO_FPCMP(FCMGT, fcmgt)
3840DO_FPCMP(FCMEQ, fcmeq)
3841DO_FPCMP(FCMNE, fcmne)
3842DO_FPCMP(FCMUO, fcmuo)
3843DO_FPCMP(FACGE, facge)
3844DO_FPCMP(FACGT, facgt)
3845
3846#undef DO_FPCMP
3847
6ceabaad
RH
3848typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3849
3850static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3851{
3852 if (fn == NULL) {
3853 return false;
3854 }
3855 if (!sve_access_check(s)) {
3856 return true;
3857 }
3858
3859 unsigned vsz = vec_full_reg_size(s);
3860 unsigned desc;
3861 TCGv_i32 t_desc;
3862 TCGv_ptr pg = tcg_temp_new_ptr();
3863
3864 /* We would need 7 operands to pass these arguments "properly".
3865 * So we encode all the register numbers into the descriptor.
3866 */
3867 desc = deposit32(a->rd, 5, 5, a->rn);
3868 desc = deposit32(desc, 10, 5, a->rm);
3869 desc = deposit32(desc, 15, 5, a->ra);
3870 desc = simd_desc(vsz, vsz, desc);
3871
3872 t_desc = tcg_const_i32(desc);
3873 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3874 fn(cpu_env, pg, t_desc);
3875 tcg_temp_free_i32(t_desc);
3876 tcg_temp_free_ptr(pg);
3877 return true;
3878}
3879
3880#define DO_FMLA(NAME, name) \
3881static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3882{ \
3883 static gen_helper_sve_fmla * const fns[4] = { \
3884 NULL, gen_helper_sve_##name##_h, \
3885 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3886 }; \
3887 return do_fmla(s, a, fns[a->esz]); \
3888}
3889
3890DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3891DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3892DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3893DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3894
3895#undef DO_FMLA
3896
8092c6a3
RH
3897/*
3898 *** SVE Floating Point Unary Operations Predicated Group
3899 */
3900
3901static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3902 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3903{
3904 if (sve_access_check(s)) {
3905 unsigned vsz = vec_full_reg_size(s);
3906 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3907 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3908 vec_full_reg_offset(s, rn),
3909 pred_full_reg_offset(s, pg),
3910 status, vsz, vsz, 0, fn);
3911 tcg_temp_free_ptr(status);
3912 }
3913 return true;
3914}
3915
3916static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3917{
3918 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3919}
3920
3921static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3922{
3923 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3924}
3925
3926static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3927{
3928 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3929}
3930
3931static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3932{
3933 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3934}
3935
3936static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3937{
3938 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3939}
3940
3941static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3942{
3943 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3944}
3945
3946static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3947{
3948 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3949}
3950
3951static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3952{
3953 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3954}
3955
3956static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3957{
3958 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3959}
3960
3961static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3962{
3963 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3964}
3965
3966static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3967{
3968 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3969}
3970
3971static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3972{
3973 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3974}
3975
3976static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3977{
3978 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3979}
3980
3981static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3982{
3983 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3984}
3985
d1822297
RH
3986/*
3987 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3988 */
3989
3990/* Subroutine loading a vector register at VOFS of LEN bytes.
3991 * The load should begin at the address Rn + IMM.
3992 */
3993
3994static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3995 int rn, int imm)
3996{
3997 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3998 uint32_t len_remain = len % 8;
3999 uint32_t nparts = len / 8 + ctpop8(len_remain);
4000 int midx = get_mem_index(s);
4001 TCGv_i64 addr, t0, t1;
4002
4003 addr = tcg_temp_new_i64();
4004 t0 = tcg_temp_new_i64();
4005
4006 /* Note that unpredicated load/store of vector/predicate registers
4007 * are defined as a stream of bytes, which equates to little-endian
4008 * operations on larger quantities. There is no nice way to force
4009 * a little-endian load for aarch64_be-linux-user out of line.
4010 *
4011 * Attempt to keep code expansion to a minimum by limiting the
4012 * amount of unrolling done.
4013 */
4014 if (nparts <= 4) {
4015 int i;
4016
4017 for (i = 0; i < len_align; i += 8) {
4018 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4019 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4020 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4021 }
4022 } else {
4023 TCGLabel *loop = gen_new_label();
4024 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4025
4026 gen_set_label(loop);
4027
4028 /* Minimize the number of local temps that must be re-read from
4029 * the stack each iteration. Instead, re-compute values other
4030 * than the loop counter.
4031 */
4032 tp = tcg_temp_new_ptr();
4033 tcg_gen_addi_ptr(tp, i, imm);
4034 tcg_gen_extu_ptr_i64(addr, tp);
4035 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4036
4037 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4038
4039 tcg_gen_add_ptr(tp, cpu_env, i);
4040 tcg_gen_addi_ptr(i, i, 8);
4041 tcg_gen_st_i64(t0, tp, vofs);
4042 tcg_temp_free_ptr(tp);
4043
4044 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4045 tcg_temp_free_ptr(i);
4046 }
4047
4048 /* Predicate register loads can be any multiple of 2.
4049 * Note that we still store the entire 64-bit unit into cpu_env.
4050 */
4051 if (len_remain) {
4052 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4053
4054 switch (len_remain) {
4055 case 2:
4056 case 4:
4057 case 8:
4058 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4059 break;
4060
4061 case 6:
4062 t1 = tcg_temp_new_i64();
4063 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4064 tcg_gen_addi_i64(addr, addr, 4);
4065 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4066 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4067 tcg_temp_free_i64(t1);
4068 break;
4069
4070 default:
4071 g_assert_not_reached();
4072 }
4073 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4074 }
4075 tcg_temp_free_i64(addr);
4076 tcg_temp_free_i64(t0);
4077}
4078
5047c204
RH
4079/* Similarly for stores. */
4080static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
4081 int rn, int imm)
4082{
4083 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4084 uint32_t len_remain = len % 8;
4085 uint32_t nparts = len / 8 + ctpop8(len_remain);
4086 int midx = get_mem_index(s);
4087 TCGv_i64 addr, t0;
4088
4089 addr = tcg_temp_new_i64();
4090 t0 = tcg_temp_new_i64();
4091
4092 /* Note that unpredicated load/store of vector/predicate registers
4093 * are defined as a stream of bytes, which equates to little-endian
4094 * operations on larger quantities. There is no nice way to force
4095 * a little-endian store for aarch64_be-linux-user out of line.
4096 *
4097 * Attempt to keep code expansion to a minimum by limiting the
4098 * amount of unrolling done.
4099 */
4100 if (nparts <= 4) {
4101 int i;
4102
4103 for (i = 0; i < len_align; i += 8) {
4104 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4105 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4106 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4107 }
4108 } else {
4109 TCGLabel *loop = gen_new_label();
4110 TCGv_ptr t2, i = tcg_const_local_ptr(0);
4111
4112 gen_set_label(loop);
4113
4114 t2 = tcg_temp_new_ptr();
4115 tcg_gen_add_ptr(t2, cpu_env, i);
4116 tcg_gen_ld_i64(t0, t2, vofs);
4117
4118 /* Minimize the number of local temps that must be re-read from
4119 * the stack each iteration. Instead, re-compute values other
4120 * than the loop counter.
4121 */
4122 tcg_gen_addi_ptr(t2, i, imm);
4123 tcg_gen_extu_ptr_i64(addr, t2);
4124 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4125 tcg_temp_free_ptr(t2);
4126
4127 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4128
4129 tcg_gen_addi_ptr(i, i, 8);
4130
4131 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4132 tcg_temp_free_ptr(i);
4133 }
4134
4135 /* Predicate register stores can be any multiple of 2. */
4136 if (len_remain) {
4137 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4138 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4139
4140 switch (len_remain) {
4141 case 2:
4142 case 4:
4143 case 8:
4144 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4145 break;
4146
4147 case 6:
4148 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4149 tcg_gen_addi_i64(addr, addr, 4);
4150 tcg_gen_shri_i64(t0, t0, 32);
4151 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4152 break;
4153
4154 default:
4155 g_assert_not_reached();
4156 }
4157 }
4158 tcg_temp_free_i64(addr);
4159 tcg_temp_free_i64(t0);
4160}
4161
d1822297
RH
4162static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4163{
4164 if (sve_access_check(s)) {
4165 int size = vec_full_reg_size(s);
4166 int off = vec_full_reg_offset(s, a->rd);
4167 do_ldr(s, off, size, a->rn, a->imm * size);
4168 }
4169 return true;
4170}
4171
4172static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4173{
4174 if (sve_access_check(s)) {
4175 int size = pred_full_reg_size(s);
4176 int off = pred_full_reg_offset(s, a->rd);
4177 do_ldr(s, off, size, a->rn, a->imm * size);
4178 }
4179 return true;
4180}
c4e7c493 4181
5047c204
RH
4182static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4183{
4184 if (sve_access_check(s)) {
4185 int size = vec_full_reg_size(s);
4186 int off = vec_full_reg_offset(s, a->rd);
4187 do_str(s, off, size, a->rn, a->imm * size);
4188 }
4189 return true;
4190}
4191
4192static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4193{
4194 if (sve_access_check(s)) {
4195 int size = pred_full_reg_size(s);
4196 int off = pred_full_reg_offset(s, a->rd);
4197 do_str(s, off, size, a->rn, a->imm * size);
4198 }
4199 return true;
4200}
4201
c4e7c493
RH
4202/*
4203 *** SVE Memory - Contiguous Load Group
4204 */
4205
4206/* The memory mode of the dtype. */
4207static const TCGMemOp dtype_mop[16] = {
4208 MO_UB, MO_UB, MO_UB, MO_UB,
4209 MO_SL, MO_UW, MO_UW, MO_UW,
4210 MO_SW, MO_SW, MO_UL, MO_UL,
4211 MO_SB, MO_SB, MO_SB, MO_Q
4212};
4213
4214#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4215
4216/* The vector element size of dtype. */
4217static const uint8_t dtype_esz[16] = {
4218 0, 1, 2, 3,
4219 3, 1, 2, 3,
4220 3, 2, 2, 3,
4221 3, 2, 1, 3
4222};
4223
4224static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4225 gen_helper_gvec_mem *fn)
4226{
4227 unsigned vsz = vec_full_reg_size(s);
4228 TCGv_ptr t_pg;
4229 TCGv_i32 desc;
4230
4231 /* For e.g. LD4, there are not enough arguments to pass all 4
4232 * registers as pointers, so encode the regno into the data field.
4233 * For consistency, do this even for LD1.
4234 */
4235 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4236 t_pg = tcg_temp_new_ptr();
4237
4238 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4239 fn(cpu_env, t_pg, addr, desc);
4240
4241 tcg_temp_free_ptr(t_pg);
4242 tcg_temp_free_i32(desc);
4243}
4244
4245static void do_ld_zpa(DisasContext *s, int zt, int pg,
4246 TCGv_i64 addr, int dtype, int nreg)
4247{
4248 static gen_helper_gvec_mem * const fns[16][4] = {
4249 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4250 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4251 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4252 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4253 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4254
4255 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4256 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4257 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4258 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4259 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4260
4261 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4262 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4263 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4264 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4265 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4266
4267 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4268 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4269 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4270 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4271 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4272 };
4273 gen_helper_gvec_mem *fn = fns[dtype][nreg];
4274
4275 /* While there are holes in the table, they are not
4276 * accessible via the instruction encoding.
4277 */
4278 assert(fn != NULL);
4279 do_mem_zpa(s, zt, pg, addr, fn);
4280}
4281
4282static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4283{
4284 if (a->rm == 31) {
4285 return false;
4286 }
4287 if (sve_access_check(s)) {
4288 TCGv_i64 addr = new_tmp_a64(s);
4289 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
4290 (a->nreg + 1) << dtype_msz(a->dtype));
4291 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4292 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4293 }
4294 return true;
4295}
4296
4297static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4298{
4299 if (sve_access_check(s)) {
4300 int vsz = vec_full_reg_size(s);
4301 int elements = vsz >> dtype_esz[a->dtype];
4302 TCGv_i64 addr = new_tmp_a64(s);
4303
4304 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4305 (a->imm * elements * (a->nreg + 1))
4306 << dtype_msz(a->dtype));
4307 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4308 }
4309 return true;
4310}
e2654d75
RH
4311
4312static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4313{
4314 static gen_helper_gvec_mem * const fns[16] = {
4315 gen_helper_sve_ldff1bb_r,
4316 gen_helper_sve_ldff1bhu_r,
4317 gen_helper_sve_ldff1bsu_r,
4318 gen_helper_sve_ldff1bdu_r,
4319
4320 gen_helper_sve_ldff1sds_r,
4321 gen_helper_sve_ldff1hh_r,
4322 gen_helper_sve_ldff1hsu_r,
4323 gen_helper_sve_ldff1hdu_r,
4324
4325 gen_helper_sve_ldff1hds_r,
4326 gen_helper_sve_ldff1hss_r,
4327 gen_helper_sve_ldff1ss_r,
4328 gen_helper_sve_ldff1sdu_r,
4329
4330 gen_helper_sve_ldff1bds_r,
4331 gen_helper_sve_ldff1bss_r,
4332 gen_helper_sve_ldff1bhs_r,
4333 gen_helper_sve_ldff1dd_r,
4334 };
4335
4336 if (sve_access_check(s)) {
4337 TCGv_i64 addr = new_tmp_a64(s);
4338 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4339 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4340 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4341 }
4342 return true;
4343}
4344
4345static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4346{
4347 static gen_helper_gvec_mem * const fns[16] = {
4348 gen_helper_sve_ldnf1bb_r,
4349 gen_helper_sve_ldnf1bhu_r,
4350 gen_helper_sve_ldnf1bsu_r,
4351 gen_helper_sve_ldnf1bdu_r,
4352
4353 gen_helper_sve_ldnf1sds_r,
4354 gen_helper_sve_ldnf1hh_r,
4355 gen_helper_sve_ldnf1hsu_r,
4356 gen_helper_sve_ldnf1hdu_r,
4357
4358 gen_helper_sve_ldnf1hds_r,
4359 gen_helper_sve_ldnf1hss_r,
4360 gen_helper_sve_ldnf1ss_r,
4361 gen_helper_sve_ldnf1sdu_r,
4362
4363 gen_helper_sve_ldnf1bds_r,
4364 gen_helper_sve_ldnf1bss_r,
4365 gen_helper_sve_ldnf1bhs_r,
4366 gen_helper_sve_ldnf1dd_r,
4367 };
4368
4369 if (sve_access_check(s)) {
4370 int vsz = vec_full_reg_size(s);
4371 int elements = vsz >> dtype_esz[a->dtype];
4372 int off = (a->imm * elements) << dtype_msz(a->dtype);
4373 TCGv_i64 addr = new_tmp_a64(s);
4374
4375 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4376 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4377 }
4378 return true;
4379}
1a039c7e 4380
05abe304
RH
4381static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4382{
4383 static gen_helper_gvec_mem * const fns[4] = {
4384 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4385 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4386 };
4387 unsigned vsz = vec_full_reg_size(s);
4388 TCGv_ptr t_pg;
4389 TCGv_i32 desc;
4390
4391 /* Load the first quadword using the normal predicated load helpers. */
4392 desc = tcg_const_i32(simd_desc(16, 16, zt));
4393 t_pg = tcg_temp_new_ptr();
4394
4395 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4396 fns[msz](cpu_env, t_pg, addr, desc);
4397
4398 tcg_temp_free_ptr(t_pg);
4399 tcg_temp_free_i32(desc);
4400
4401 /* Replicate that first quadword. */
4402 if (vsz > 16) {
4403 unsigned dofs = vec_full_reg_offset(s, zt);
4404 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4405 }
4406}
4407
4408static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4409{
4410 if (a->rm == 31) {
4411 return false;
4412 }
4413 if (sve_access_check(s)) {
4414 int msz = dtype_msz(a->dtype);
4415 TCGv_i64 addr = new_tmp_a64(s);
4416 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4417 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4418 do_ldrq(s, a->rd, a->pg, addr, msz);
4419 }
4420 return true;
4421}
4422
4423static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4424{
4425 if (sve_access_check(s)) {
4426 TCGv_i64 addr = new_tmp_a64(s);
4427 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4428 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4429 }
4430 return true;
4431}
4432
68459864
RH
4433/* Load and broadcast element. */
4434static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4435{
4436 if (!sve_access_check(s)) {
4437 return true;
4438 }
4439
4440 unsigned vsz = vec_full_reg_size(s);
4441 unsigned psz = pred_full_reg_size(s);
4442 unsigned esz = dtype_esz[a->dtype];
4443 TCGLabel *over = gen_new_label();
4444 TCGv_i64 temp;
4445
4446 /* If the guarding predicate has no bits set, no load occurs. */
4447 if (psz <= 8) {
4448 /* Reduce the pred_esz_masks value simply to reduce the
4449 * size of the code generated here.
4450 */
4451 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4452 temp = tcg_temp_new_i64();
4453 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4454 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4455 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4456 tcg_temp_free_i64(temp);
4457 } else {
4458 TCGv_i32 t32 = tcg_temp_new_i32();
4459 find_last_active(s, t32, esz, a->pg);
4460 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4461 tcg_temp_free_i32(t32);
4462 }
4463
4464 /* Load the data. */
4465 temp = tcg_temp_new_i64();
4466 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4467 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4468 s->be_data | dtype_mop[a->dtype]);
4469
4470 /* Broadcast to *all* elements. */
4471 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4472 vsz, vsz, temp);
4473 tcg_temp_free_i64(temp);
4474
4475 /* Zero the inactive elements. */
4476 gen_set_label(over);
4477 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4478 return true;
4479}
4480
1a039c7e
RH
4481static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4482 int msz, int esz, int nreg)
4483{
4484 static gen_helper_gvec_mem * const fn_single[4][4] = {
4485 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4486 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4487 { NULL, gen_helper_sve_st1hh_r,
4488 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4489 { NULL, NULL,
4490 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4491 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4492 };
4493 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4494 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4495 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4496 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4497 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4498 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4499 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4500 };
4501 gen_helper_gvec_mem *fn;
4502
4503 if (nreg == 0) {
4504 /* ST1 */
4505 fn = fn_single[msz][esz];
4506 } else {
4507 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4508 assert(msz == esz);
4509 fn = fn_multiple[nreg - 1][msz];
4510 }
4511 assert(fn != NULL);
4512 do_mem_zpa(s, zt, pg, addr, fn);
4513}
4514
4515static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4516{
4517 if (a->rm == 31 || a->msz > a->esz) {
4518 return false;
4519 }
4520 if (sve_access_check(s)) {
4521 TCGv_i64 addr = new_tmp_a64(s);
4522 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4523 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4524 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4525 }
4526 return true;
4527}
4528
4529static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4530{
4531 if (a->msz > a->esz) {
4532 return false;
4533 }
4534 if (sve_access_check(s)) {
4535 int vsz = vec_full_reg_size(s);
4536 int elements = vsz >> a->esz;
4537 TCGv_i64 addr = new_tmp_a64(s);
4538
4539 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4540 (a->imm * elements * (a->nreg + 1)) << a->msz);
4541 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4542 }
4543 return true;
4544}
f6dbf62a
RH
4545
4546/*
4547 *** SVE gather loads / scatter stores
4548 */
4549
4550static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4551 TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4552{
4553 unsigned vsz = vec_full_reg_size(s);
4554 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4555 TCGv_ptr t_zm = tcg_temp_new_ptr();
4556 TCGv_ptr t_pg = tcg_temp_new_ptr();
4557 TCGv_ptr t_zt = tcg_temp_new_ptr();
4558
4559 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4560 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4561 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4562 fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4563
4564 tcg_temp_free_ptr(t_zt);
4565 tcg_temp_free_ptr(t_zm);
4566 tcg_temp_free_ptr(t_pg);
4567 tcg_temp_free_i32(desc);
4568}
4569
673e9fa6
RH
4570/* Indexed by [ff][xs][u][msz]. */
4571static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4572 { { { gen_helper_sve_ldbss_zsu,
4573 gen_helper_sve_ldhss_zsu,
4574 NULL, },
4575 { gen_helper_sve_ldbsu_zsu,
4576 gen_helper_sve_ldhsu_zsu,
4577 gen_helper_sve_ldssu_zsu, } },
4578 { { gen_helper_sve_ldbss_zss,
4579 gen_helper_sve_ldhss_zss,
4580 NULL, },
4581 { gen_helper_sve_ldbsu_zss,
4582 gen_helper_sve_ldhsu_zss,
4583 gen_helper_sve_ldssu_zss, } } },
ed67eb7f
RH
4584
4585 { { { gen_helper_sve_ldffbss_zsu,
4586 gen_helper_sve_ldffhss_zsu,
4587 NULL, },
4588 { gen_helper_sve_ldffbsu_zsu,
4589 gen_helper_sve_ldffhsu_zsu,
4590 gen_helper_sve_ldffssu_zsu, } },
4591 { { gen_helper_sve_ldffbss_zss,
4592 gen_helper_sve_ldffhss_zss,
4593 NULL, },
4594 { gen_helper_sve_ldffbsu_zss,
4595 gen_helper_sve_ldffhsu_zss,
4596 gen_helper_sve_ldffssu_zss, } } }
673e9fa6
RH
4597};
4598
4599/* Note that we overload xs=2 to indicate 64-bit offset. */
4600static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4601 { { { gen_helper_sve_ldbds_zsu,
4602 gen_helper_sve_ldhds_zsu,
4603 gen_helper_sve_ldsds_zsu,
4604 NULL, },
4605 { gen_helper_sve_ldbdu_zsu,
4606 gen_helper_sve_ldhdu_zsu,
4607 gen_helper_sve_ldsdu_zsu,
4608 gen_helper_sve_ldddu_zsu, } },
4609 { { gen_helper_sve_ldbds_zss,
4610 gen_helper_sve_ldhds_zss,
4611 gen_helper_sve_ldsds_zss,
4612 NULL, },
4613 { gen_helper_sve_ldbdu_zss,
4614 gen_helper_sve_ldhdu_zss,
4615 gen_helper_sve_ldsdu_zss,
4616 gen_helper_sve_ldddu_zss, } },
4617 { { gen_helper_sve_ldbds_zd,
4618 gen_helper_sve_ldhds_zd,
4619 gen_helper_sve_ldsds_zd,
4620 NULL, },
4621 { gen_helper_sve_ldbdu_zd,
4622 gen_helper_sve_ldhdu_zd,
4623 gen_helper_sve_ldsdu_zd,
4624 gen_helper_sve_ldddu_zd, } } },
ed67eb7f
RH
4625
4626 { { { gen_helper_sve_ldffbds_zsu,
4627 gen_helper_sve_ldffhds_zsu,
4628 gen_helper_sve_ldffsds_zsu,
4629 NULL, },
4630 { gen_helper_sve_ldffbdu_zsu,
4631 gen_helper_sve_ldffhdu_zsu,
4632 gen_helper_sve_ldffsdu_zsu,
4633 gen_helper_sve_ldffddu_zsu, } },
4634 { { gen_helper_sve_ldffbds_zss,
4635 gen_helper_sve_ldffhds_zss,
4636 gen_helper_sve_ldffsds_zss,
4637 NULL, },
4638 { gen_helper_sve_ldffbdu_zss,
4639 gen_helper_sve_ldffhdu_zss,
4640 gen_helper_sve_ldffsdu_zss,
4641 gen_helper_sve_ldffddu_zss, } },
4642 { { gen_helper_sve_ldffbds_zd,
4643 gen_helper_sve_ldffhds_zd,
4644 gen_helper_sve_ldffsds_zd,
4645 NULL, },
4646 { gen_helper_sve_ldffbdu_zd,
4647 gen_helper_sve_ldffhdu_zd,
4648 gen_helper_sve_ldffsdu_zd,
4649 gen_helper_sve_ldffddu_zd, } } }
673e9fa6
RH
4650};
4651
4652static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
4653{
4654 gen_helper_gvec_mem_scatter *fn = NULL;
4655
4656 if (!sve_access_check(s)) {
4657 return true;
4658 }
4659
4660 switch (a->esz) {
4661 case MO_32:
4662 fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
4663 break;
4664 case MO_64:
4665 fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
4666 break;
4667 }
4668 assert(fn != NULL);
4669
4670 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4671 cpu_reg_sp(s, a->rn), fn);
4672 return true;
4673}
4674
4675static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
4676{
4677 gen_helper_gvec_mem_scatter *fn = NULL;
4678 TCGv_i64 imm;
4679
4680 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
4681 return false;
4682 }
4683 if (!sve_access_check(s)) {
4684 return true;
4685 }
4686
4687 switch (a->esz) {
4688 case MO_32:
4689 fn = gather_load_fn32[a->ff][0][a->u][a->msz];
4690 break;
4691 case MO_64:
4692 fn = gather_load_fn64[a->ff][2][a->u][a->msz];
4693 break;
4694 }
4695 assert(fn != NULL);
4696
4697 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
4698 * by loading the immediate into the scalar parameter.
4699 */
4700 imm = tcg_const_i64(a->imm << a->msz);
4701 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4702 tcg_temp_free_i64(imm);
4703 return true;
4704}
4705
408ecde9
RH
4706/* Indexed by [xs][msz]. */
4707static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
4708 { gen_helper_sve_stbs_zsu,
4709 gen_helper_sve_sths_zsu,
4710 gen_helper_sve_stss_zsu, },
4711 { gen_helper_sve_stbs_zss,
4712 gen_helper_sve_sths_zss,
4713 gen_helper_sve_stss_zss, },
4714};
4715
4716/* Note that we overload xs=2 to indicate 64-bit offset. */
4717static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
4718 { gen_helper_sve_stbd_zsu,
4719 gen_helper_sve_sthd_zsu,
4720 gen_helper_sve_stsd_zsu,
4721 gen_helper_sve_stdd_zsu, },
4722 { gen_helper_sve_stbd_zss,
4723 gen_helper_sve_sthd_zss,
4724 gen_helper_sve_stsd_zss,
4725 gen_helper_sve_stdd_zss, },
4726 { gen_helper_sve_stbd_zd,
4727 gen_helper_sve_sthd_zd,
4728 gen_helper_sve_stsd_zd,
4729 gen_helper_sve_stdd_zd, },
4730};
4731
f6dbf62a
RH
4732static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
4733{
f6dbf62a
RH
4734 gen_helper_gvec_mem_scatter *fn;
4735
4736 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
4737 return false;
4738 }
4739 if (!sve_access_check(s)) {
4740 return true;
4741 }
4742 switch (a->esz) {
4743 case MO_32:
408ecde9 4744 fn = scatter_store_fn32[a->xs][a->msz];
f6dbf62a
RH
4745 break;
4746 case MO_64:
408ecde9 4747 fn = scatter_store_fn64[a->xs][a->msz];
f6dbf62a
RH
4748 break;
4749 default:
4750 g_assert_not_reached();
4751 }
4752 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4753 cpu_reg_sp(s, a->rn), fn);
4754 return true;
4755}
dec6cf6b 4756
408ecde9
RH
4757static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
4758{
4759 gen_helper_gvec_mem_scatter *fn = NULL;
4760 TCGv_i64 imm;
4761
4762 if (a->esz < a->msz) {
4763 return false;
4764 }
4765 if (!sve_access_check(s)) {
4766 return true;
4767 }
4768
4769 switch (a->esz) {
4770 case MO_32:
4771 fn = scatter_store_fn32[0][a->msz];
4772 break;
4773 case MO_64:
4774 fn = scatter_store_fn64[2][a->msz];
4775 break;
4776 }
4777 assert(fn != NULL);
4778
4779 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
4780 * by loading the immediate into the scalar parameter.
4781 */
4782 imm = tcg_const_i64(a->imm << a->msz);
4783 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4784 tcg_temp_free_i64(imm);
4785 return true;
4786}
4787
dec6cf6b
RH
4788/*
4789 * Prefetches
4790 */
4791
4792static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
4793{
4794 /* Prefetch is a nop within QEMU. */
4795 sve_access_check(s);
4796 return true;
4797}
4798
4799static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
4800{
4801 if (a->rm == 31) {
4802 return false;
4803 }
4804 /* Prefetch is a nop within QEMU. */
4805 sve_access_check(s);
4806 return true;
4807}