]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Fix sign of sve_cmpeq_ppzw/sve_cmpne_ppzw
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
cc48affe 35#include "fpu/softfloat.h"
38388f7e 36
757f9cff 37
9ee3a611
RH
38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
38cadeba
RH
41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
757f9cff
RH
43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
c4e7c493 46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 49
ccd841c3
RH
50/*
51 * Helpers for extracting complex instruction fields.
52 */
53
54/* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
57static int tszimm_esz(int x)
58{
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61}
62
63static int tszimm_shr(int x)
64{
65 return (16 << tszimm_esz(x)) - x;
66}
67
68/* See e.g. LSL (immediate, predicated). */
69static int tszimm_shl(int x)
70{
71 return x - (8 << tszimm_esz(x));
72}
73
24e82e68
RH
74static inline int plus1(int x)
75{
76 return x + 1;
77}
78
f25a2361
RH
79/* The SH bit is in bit 8. Extract the low 8 and shift. */
80static inline int expand_imm_sh8s(int x)
81{
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83}
84
6e6a157d
RH
85static inline int expand_imm_sh8u(int x)
86{
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88}
89
c4e7c493
RH
90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
93static inline int msz_dtype(int msz)
94{
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97}
98
38388f7e
RH
99/*
100 * Include the generated decoder.
101 */
102
103#include "decode-sve.inc.c"
104
105/*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
d1822297
RH
109/* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112static inline int pred_full_reg_offset(DisasContext *s, int regno)
113{
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115}
116
117/* Return the byte size of the whole predicate register, VL / 64. */
118static inline int pred_full_reg_size(DisasContext *s)
119{
120 return s->sve_len >> 3;
121}
122
516e246a
RH
123/* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131static int size_for_gvec(int size)
132{
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138}
139
140static int pred_gvec_reg_size(DisasContext *s)
141{
142 return size_for_gvec(pred_full_reg_size(s));
143}
144
39eea561
RH
145/* Invoke a vector expander on two Zregs. */
146static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
147 int esz, int rd, int rn)
38388f7e 148{
39eea561
RH
149 if (sve_access_check(s)) {
150 unsigned vsz = vec_full_reg_size(s);
151 gvec_fn(esz, vec_full_reg_offset(s, rd),
152 vec_full_reg_offset(s, rn), vsz, vsz);
153 }
154 return true;
38388f7e
RH
155}
156
39eea561
RH
157/* Invoke a vector expander on three Zregs. */
158static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
159 int esz, int rd, int rn, int rm)
38388f7e 160{
39eea561
RH
161 if (sve_access_check(s)) {
162 unsigned vsz = vec_full_reg_size(s);
163 gvec_fn(esz, vec_full_reg_offset(s, rd),
164 vec_full_reg_offset(s, rn),
165 vec_full_reg_offset(s, rm), vsz, vsz);
166 }
167 return true;
38388f7e
RH
168}
169
39eea561
RH
170/* Invoke a vector move on two Zregs. */
171static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 172{
39eea561 173 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
174}
175
d9d78dcc
RH
176/* Initialize a Zreg with replications of a 64-bit immediate. */
177static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
178{
179 unsigned vsz = vec_full_reg_size(s);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
181}
182
516e246a
RH
183/* Invoke a vector expander on two Pregs. */
184static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
185 int esz, int rd, int rn)
186{
187 if (sve_access_check(s)) {
188 unsigned psz = pred_gvec_reg_size(s);
189 gvec_fn(esz, pred_full_reg_offset(s, rd),
190 pred_full_reg_offset(s, rn), psz, psz);
191 }
192 return true;
193}
194
195/* Invoke a vector expander on three Pregs. */
196static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
197 int esz, int rd, int rn, int rm)
198{
199 if (sve_access_check(s)) {
200 unsigned psz = pred_gvec_reg_size(s);
201 gvec_fn(esz, pred_full_reg_offset(s, rd),
202 pred_full_reg_offset(s, rn),
203 pred_full_reg_offset(s, rm), psz, psz);
204 }
205 return true;
206}
207
208/* Invoke a vector operation on four Pregs. */
209static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
210 int rd, int rn, int rm, int rg)
211{
212 if (sve_access_check(s)) {
213 unsigned psz = pred_gvec_reg_size(s);
214 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
215 pred_full_reg_offset(s, rn),
216 pred_full_reg_offset(s, rm),
217 pred_full_reg_offset(s, rg),
218 psz, psz, gvec_op);
219 }
220 return true;
221}
222
223/* Invoke a vector move on two Pregs. */
224static bool do_mov_p(DisasContext *s, int rd, int rn)
225{
226 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
227}
228
9e18d7a6
RH
229/* Set the cpu flags as per a return from an SVE helper. */
230static void do_pred_flags(TCGv_i32 t)
231{
232 tcg_gen_mov_i32(cpu_NF, t);
233 tcg_gen_andi_i32(cpu_ZF, t, 2);
234 tcg_gen_andi_i32(cpu_CF, t, 1);
235 tcg_gen_movi_i32(cpu_VF, 0);
236}
237
238/* Subroutines computing the ARM PredTest psuedofunction. */
239static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
240{
241 TCGv_i32 t = tcg_temp_new_i32();
242
243 gen_helper_sve_predtest1(t, d, g);
244 do_pred_flags(t);
245 tcg_temp_free_i32(t);
246}
247
248static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
249{
250 TCGv_ptr dptr = tcg_temp_new_ptr();
251 TCGv_ptr gptr = tcg_temp_new_ptr();
252 TCGv_i32 t;
253
254 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
255 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
256 t = tcg_const_i32(words);
257
258 gen_helper_sve_predtest(t, dptr, gptr, t);
259 tcg_temp_free_ptr(dptr);
260 tcg_temp_free_ptr(gptr);
261
262 do_pred_flags(t);
263 tcg_temp_free_i32(t);
264}
265
028e2a7b
RH
266/* For each element size, the bits within a predicate word that are active. */
267const uint64_t pred_esz_masks[4] = {
268 0xffffffffffffffffull, 0x5555555555555555ull,
269 0x1111111111111111ull, 0x0101010101010101ull
270};
271
39eea561
RH
272/*
273 *** SVE Logical - Unpredicated Group
274 */
275
276static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
277{
278 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
279}
280
281static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
282{
283 if (a->rn == a->rm) { /* MOV */
284 return do_mov_z(s, a->rd, a->rn);
285 } else {
286 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
287 }
288}
289
290static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291{
292 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
293}
294
295static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 296{
39eea561 297 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 298}
d1822297 299
fea98f9c
RH
300/*
301 *** SVE Integer Arithmetic - Unpredicated Group
302 */
303
304static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
305{
306 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
307}
308
309static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
310{
311 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
312}
313
314static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
315{
316 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
317}
318
319static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
320{
321 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
322}
323
324static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
325{
326 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
327}
328
329static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
330{
331 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
332}
333
f97cfd59
RH
334/*
335 *** SVE Integer Arithmetic - Binary Predicated Group
336 */
337
338static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
339{
340 unsigned vsz = vec_full_reg_size(s);
341 if (fn == NULL) {
342 return false;
343 }
344 if (sve_access_check(s)) {
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
346 vec_full_reg_offset(s, a->rn),
347 vec_full_reg_offset(s, a->rm),
348 pred_full_reg_offset(s, a->pg),
349 vsz, vsz, 0, fn);
350 }
351 return true;
352}
353
a2103582
RH
354/* Select active elememnts from Zn and inactive elements from Zm,
355 * storing the result in Zd.
356 */
357static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
358{
359 static gen_helper_gvec_4 * const fns[4] = {
360 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
361 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
362 };
363 unsigned vsz = vec_full_reg_size(s);
364 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
365 vec_full_reg_offset(s, rn),
366 vec_full_reg_offset(s, rm),
367 pred_full_reg_offset(s, pg),
368 vsz, vsz, 0, fns[esz]);
369}
370
f97cfd59
RH
371#define DO_ZPZZ(NAME, name) \
372static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
373 uint32_t insn) \
374{ \
375 static gen_helper_gvec_4 * const fns[4] = { \
376 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
377 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
378 }; \
379 return do_zpzz_ool(s, a, fns[a->esz]); \
380}
381
382DO_ZPZZ(AND, and)
383DO_ZPZZ(EOR, eor)
384DO_ZPZZ(ORR, orr)
385DO_ZPZZ(BIC, bic)
386
387DO_ZPZZ(ADD, add)
388DO_ZPZZ(SUB, sub)
389
390DO_ZPZZ(SMAX, smax)
391DO_ZPZZ(UMAX, umax)
392DO_ZPZZ(SMIN, smin)
393DO_ZPZZ(UMIN, umin)
394DO_ZPZZ(SABD, sabd)
395DO_ZPZZ(UABD, uabd)
396
397DO_ZPZZ(MUL, mul)
398DO_ZPZZ(SMULH, smulh)
399DO_ZPZZ(UMULH, umulh)
400
27721dbb
RH
401DO_ZPZZ(ASR, asr)
402DO_ZPZZ(LSR, lsr)
403DO_ZPZZ(LSL, lsl)
404
f97cfd59
RH
405static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
406{
407 static gen_helper_gvec_4 * const fns[4] = {
408 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
409 };
410 return do_zpzz_ool(s, a, fns[a->esz]);
411}
412
413static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
414{
415 static gen_helper_gvec_4 * const fns[4] = {
416 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
417 };
418 return do_zpzz_ool(s, a, fns[a->esz]);
419}
420
a2103582
RH
421static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
422{
423 if (sve_access_check(s)) {
424 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
425 }
426 return true;
427}
d3fe4a29 428
f97cfd59
RH
429#undef DO_ZPZZ
430
afac6d04
RH
431/*
432 *** SVE Integer Arithmetic - Unary Predicated Group
433 */
434
435static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
436{
437 if (fn == NULL) {
438 return false;
439 }
440 if (sve_access_check(s)) {
441 unsigned vsz = vec_full_reg_size(s);
442 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
443 vec_full_reg_offset(s, a->rn),
444 pred_full_reg_offset(s, a->pg),
445 vsz, vsz, 0, fn);
446 }
447 return true;
448}
449
450#define DO_ZPZ(NAME, name) \
451static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
452{ \
453 static gen_helper_gvec_3 * const fns[4] = { \
454 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
455 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
456 }; \
457 return do_zpz_ool(s, a, fns[a->esz]); \
458}
459
460DO_ZPZ(CLS, cls)
461DO_ZPZ(CLZ, clz)
462DO_ZPZ(CNT_zpz, cnt_zpz)
463DO_ZPZ(CNOT, cnot)
464DO_ZPZ(NOT_zpz, not_zpz)
465DO_ZPZ(ABS, abs)
466DO_ZPZ(NEG, neg)
467
468static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
469{
470 static gen_helper_gvec_3 * const fns[4] = {
471 NULL,
472 gen_helper_sve_fabs_h,
473 gen_helper_sve_fabs_s,
474 gen_helper_sve_fabs_d
475 };
476 return do_zpz_ool(s, a, fns[a->esz]);
477}
478
479static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
480{
481 static gen_helper_gvec_3 * const fns[4] = {
482 NULL,
483 gen_helper_sve_fneg_h,
484 gen_helper_sve_fneg_s,
485 gen_helper_sve_fneg_d
486 };
487 return do_zpz_ool(s, a, fns[a->esz]);
488}
489
490static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
491{
492 static gen_helper_gvec_3 * const fns[4] = {
493 NULL,
494 gen_helper_sve_sxtb_h,
495 gen_helper_sve_sxtb_s,
496 gen_helper_sve_sxtb_d
497 };
498 return do_zpz_ool(s, a, fns[a->esz]);
499}
500
501static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
502{
503 static gen_helper_gvec_3 * const fns[4] = {
504 NULL,
505 gen_helper_sve_uxtb_h,
506 gen_helper_sve_uxtb_s,
507 gen_helper_sve_uxtb_d
508 };
509 return do_zpz_ool(s, a, fns[a->esz]);
510}
511
512static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
513{
514 static gen_helper_gvec_3 * const fns[4] = {
515 NULL, NULL,
516 gen_helper_sve_sxth_s,
517 gen_helper_sve_sxth_d
518 };
519 return do_zpz_ool(s, a, fns[a->esz]);
520}
521
522static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
523{
524 static gen_helper_gvec_3 * const fns[4] = {
525 NULL, NULL,
526 gen_helper_sve_uxth_s,
527 gen_helper_sve_uxth_d
528 };
529 return do_zpz_ool(s, a, fns[a->esz]);
530}
531
532static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
533{
534 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
535}
536
537static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
538{
539 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
540}
541
542#undef DO_ZPZ
543
047cec97
RH
544/*
545 *** SVE Integer Reduction Group
546 */
547
548typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
549static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
550 gen_helper_gvec_reduc *fn)
551{
552 unsigned vsz = vec_full_reg_size(s);
553 TCGv_ptr t_zn, t_pg;
554 TCGv_i32 desc;
555 TCGv_i64 temp;
556
557 if (fn == NULL) {
558 return false;
559 }
560 if (!sve_access_check(s)) {
561 return true;
562 }
563
564 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
565 temp = tcg_temp_new_i64();
566 t_zn = tcg_temp_new_ptr();
567 t_pg = tcg_temp_new_ptr();
568
569 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
570 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
571 fn(temp, t_zn, t_pg, desc);
572 tcg_temp_free_ptr(t_zn);
573 tcg_temp_free_ptr(t_pg);
574 tcg_temp_free_i32(desc);
575
576 write_fp_dreg(s, a->rd, temp);
577 tcg_temp_free_i64(temp);
578 return true;
579}
580
581#define DO_VPZ(NAME, name) \
582static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
583{ \
584 static gen_helper_gvec_reduc * const fns[4] = { \
585 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
586 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
587 }; \
588 return do_vpz_ool(s, a, fns[a->esz]); \
589}
590
591DO_VPZ(ORV, orv)
592DO_VPZ(ANDV, andv)
593DO_VPZ(EORV, eorv)
594
595DO_VPZ(UADDV, uaddv)
596DO_VPZ(SMAXV, smaxv)
597DO_VPZ(UMAXV, umaxv)
598DO_VPZ(SMINV, sminv)
599DO_VPZ(UMINV, uminv)
600
601static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
602{
603 static gen_helper_gvec_reduc * const fns[4] = {
604 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
605 gen_helper_sve_saddv_s, NULL
606 };
607 return do_vpz_ool(s, a, fns[a->esz]);
608}
609
610#undef DO_VPZ
611
ccd841c3
RH
612/*
613 *** SVE Shift by Immediate - Predicated Group
614 */
615
616/* Store zero into every active element of Zd. We will use this for two
617 * and three-operand predicated instructions for which logic dictates a
618 * zero result.
619 */
620static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
621{
622 static gen_helper_gvec_2 * const fns[4] = {
623 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
624 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
625 };
626 if (sve_access_check(s)) {
627 unsigned vsz = vec_full_reg_size(s);
628 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
629 pred_full_reg_offset(s, pg),
630 vsz, vsz, 0, fns[esz]);
631 }
632 return true;
633}
634
68459864
RH
635/* Copy Zn into Zd, storing zeros into inactive elements. */
636static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
637{
638 static gen_helper_gvec_3 * const fns[4] = {
639 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
640 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
641 };
642 unsigned vsz = vec_full_reg_size(s);
643 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
644 vec_full_reg_offset(s, rn),
645 pred_full_reg_offset(s, pg),
646 vsz, vsz, 0, fns[esz]);
647}
648
ccd841c3
RH
649static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
650 gen_helper_gvec_3 *fn)
651{
652 if (sve_access_check(s)) {
653 unsigned vsz = vec_full_reg_size(s);
654 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
655 vec_full_reg_offset(s, a->rn),
656 pred_full_reg_offset(s, a->pg),
657 vsz, vsz, a->imm, fn);
658 }
659 return true;
660}
661
662static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
663{
664 static gen_helper_gvec_3 * const fns[4] = {
665 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
666 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
667 };
668 if (a->esz < 0) {
669 /* Invalid tsz encoding -- see tszimm_esz. */
670 return false;
671 }
672 /* Shift by element size is architecturally valid. For
673 arithmetic right-shift, it's the same as by one less. */
674 a->imm = MIN(a->imm, (8 << a->esz) - 1);
675 return do_zpzi_ool(s, a, fns[a->esz]);
676}
677
678static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
679{
680 static gen_helper_gvec_3 * const fns[4] = {
681 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
682 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
683 };
684 if (a->esz < 0) {
685 return false;
686 }
687 /* Shift by element size is architecturally valid.
688 For logical shifts, it is a zeroing operation. */
689 if (a->imm >= (8 << a->esz)) {
690 return do_clr_zp(s, a->rd, a->pg, a->esz);
691 } else {
692 return do_zpzi_ool(s, a, fns[a->esz]);
693 }
694}
695
696static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
697{
698 static gen_helper_gvec_3 * const fns[4] = {
699 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
700 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
701 };
702 if (a->esz < 0) {
703 return false;
704 }
705 /* Shift by element size is architecturally valid.
706 For logical shifts, it is a zeroing operation. */
707 if (a->imm >= (8 << a->esz)) {
708 return do_clr_zp(s, a->rd, a->pg, a->esz);
709 } else {
710 return do_zpzi_ool(s, a, fns[a->esz]);
711 }
712}
713
714static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
715{
716 static gen_helper_gvec_3 * const fns[4] = {
717 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
718 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
719 };
720 if (a->esz < 0) {
721 return false;
722 }
723 /* Shift by element size is architecturally valid. For arithmetic
724 right shift for division, it is a zeroing operation. */
725 if (a->imm >= (8 << a->esz)) {
726 return do_clr_zp(s, a->rd, a->pg, a->esz);
727 } else {
728 return do_zpzi_ool(s, a, fns[a->esz]);
729 }
730}
731
fe7f8dfb
RH
732/*
733 *** SVE Bitwise Shift - Predicated Group
734 */
735
736#define DO_ZPZW(NAME, name) \
737static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
738 uint32_t insn) \
739{ \
740 static gen_helper_gvec_4 * const fns[3] = { \
741 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
742 gen_helper_sve_##name##_zpzw_s, \
743 }; \
744 if (a->esz < 0 || a->esz >= 3) { \
745 return false; \
746 } \
747 return do_zpzz_ool(s, a, fns[a->esz]); \
748}
749
750DO_ZPZW(ASR, asr)
751DO_ZPZW(LSR, lsr)
752DO_ZPZW(LSL, lsl)
753
754#undef DO_ZPZW
755
d9d78dcc
RH
756/*
757 *** SVE Bitwise Shift - Unpredicated Group
758 */
759
760static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
761 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
762 int64_t, uint32_t, uint32_t))
763{
764 if (a->esz < 0) {
765 /* Invalid tsz encoding -- see tszimm_esz. */
766 return false;
767 }
768 if (sve_access_check(s)) {
769 unsigned vsz = vec_full_reg_size(s);
770 /* Shift by element size is architecturally valid. For
771 arithmetic right-shift, it's the same as by one less.
772 Otherwise it is a zeroing operation. */
773 if (a->imm >= 8 << a->esz) {
774 if (asr) {
775 a->imm = (8 << a->esz) - 1;
776 } else {
777 do_dupi_z(s, a->rd, 0);
778 return true;
779 }
780 }
781 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
782 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
783 }
784 return true;
785}
786
787static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
788{
789 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
790}
791
792static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
793{
794 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
795}
796
797static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
798{
799 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
800}
801
802static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
803{
804 if (fn == NULL) {
805 return false;
806 }
807 if (sve_access_check(s)) {
808 unsigned vsz = vec_full_reg_size(s);
809 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
810 vec_full_reg_offset(s, a->rn),
811 vec_full_reg_offset(s, a->rm),
812 vsz, vsz, 0, fn);
813 }
814 return true;
815}
816
817#define DO_ZZW(NAME, name) \
818static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
819 uint32_t insn) \
820{ \
821 static gen_helper_gvec_3 * const fns[4] = { \
822 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
823 gen_helper_sve_##name##_zzw_s, NULL \
824 }; \
825 return do_zzw_ool(s, a, fns[a->esz]); \
826}
827
828DO_ZZW(ASR, asr)
829DO_ZZW(LSR, lsr)
830DO_ZZW(LSL, lsl)
831
832#undef DO_ZZW
833
96a36e4a
RH
834/*
835 *** SVE Integer Multiply-Add Group
836 */
837
838static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
839 gen_helper_gvec_5 *fn)
840{
841 if (sve_access_check(s)) {
842 unsigned vsz = vec_full_reg_size(s);
843 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
844 vec_full_reg_offset(s, a->ra),
845 vec_full_reg_offset(s, a->rn),
846 vec_full_reg_offset(s, a->rm),
847 pred_full_reg_offset(s, a->pg),
848 vsz, vsz, 0, fn);
849 }
850 return true;
851}
852
853#define DO_ZPZZZ(NAME, name) \
854static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
855{ \
856 static gen_helper_gvec_5 * const fns[4] = { \
857 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
858 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
859 }; \
860 return do_zpzzz_ool(s, a, fns[a->esz]); \
861}
862
863DO_ZPZZZ(MLA, mla)
864DO_ZPZZZ(MLS, mls)
865
866#undef DO_ZPZZZ
867
9a56c9c3
RH
868/*
869 *** SVE Index Generation Group
870 */
871
872static void do_index(DisasContext *s, int esz, int rd,
873 TCGv_i64 start, TCGv_i64 incr)
874{
875 unsigned vsz = vec_full_reg_size(s);
876 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
877 TCGv_ptr t_zd = tcg_temp_new_ptr();
878
879 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
880 if (esz == 3) {
881 gen_helper_sve_index_d(t_zd, start, incr, desc);
882 } else {
883 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
884 static index_fn * const fns[3] = {
885 gen_helper_sve_index_b,
886 gen_helper_sve_index_h,
887 gen_helper_sve_index_s,
888 };
889 TCGv_i32 s32 = tcg_temp_new_i32();
890 TCGv_i32 i32 = tcg_temp_new_i32();
891
892 tcg_gen_extrl_i64_i32(s32, start);
893 tcg_gen_extrl_i64_i32(i32, incr);
894 fns[esz](t_zd, s32, i32, desc);
895
896 tcg_temp_free_i32(s32);
897 tcg_temp_free_i32(i32);
898 }
899 tcg_temp_free_ptr(t_zd);
900 tcg_temp_free_i32(desc);
901}
902
903static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
904{
905 if (sve_access_check(s)) {
906 TCGv_i64 start = tcg_const_i64(a->imm1);
907 TCGv_i64 incr = tcg_const_i64(a->imm2);
908 do_index(s, a->esz, a->rd, start, incr);
909 tcg_temp_free_i64(start);
910 tcg_temp_free_i64(incr);
911 }
912 return true;
913}
914
915static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
916{
917 if (sve_access_check(s)) {
918 TCGv_i64 start = tcg_const_i64(a->imm);
919 TCGv_i64 incr = cpu_reg(s, a->rm);
920 do_index(s, a->esz, a->rd, start, incr);
921 tcg_temp_free_i64(start);
922 }
923 return true;
924}
925
926static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
927{
928 if (sve_access_check(s)) {
929 TCGv_i64 start = cpu_reg(s, a->rn);
930 TCGv_i64 incr = tcg_const_i64(a->imm);
931 do_index(s, a->esz, a->rd, start, incr);
932 tcg_temp_free_i64(incr);
933 }
934 return true;
935}
936
937static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
938{
939 if (sve_access_check(s)) {
940 TCGv_i64 start = cpu_reg(s, a->rn);
941 TCGv_i64 incr = cpu_reg(s, a->rm);
942 do_index(s, a->esz, a->rd, start, incr);
943 }
944 return true;
945}
946
96f922cc
RH
947/*
948 *** SVE Stack Allocation Group
949 */
950
951static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
952{
953 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
954 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
955 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
956 return true;
957}
958
959static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
960{
961 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
962 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
963 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
964 return true;
965}
966
967static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
968{
969 TCGv_i64 reg = cpu_reg(s, a->rd);
970 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
971 return true;
972}
973
4b242d9c
RH
974/*
975 *** SVE Compute Vector Address Group
976 */
977
978static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
979{
980 if (sve_access_check(s)) {
981 unsigned vsz = vec_full_reg_size(s);
982 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
983 vec_full_reg_offset(s, a->rn),
984 vec_full_reg_offset(s, a->rm),
985 vsz, vsz, a->imm, fn);
986 }
987 return true;
988}
989
990static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
991{
992 return do_adr(s, a, gen_helper_sve_adr_p32);
993}
994
995static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
996{
997 return do_adr(s, a, gen_helper_sve_adr_p64);
998}
999
1000static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
1001{
1002 return do_adr(s, a, gen_helper_sve_adr_s32);
1003}
1004
1005static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
1006{
1007 return do_adr(s, a, gen_helper_sve_adr_u32);
1008}
1009
0762cd42
RH
1010/*
1011 *** SVE Integer Misc - Unpredicated Group
1012 */
1013
1014static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1015{
1016 static gen_helper_gvec_2 * const fns[4] = {
1017 NULL,
1018 gen_helper_sve_fexpa_h,
1019 gen_helper_sve_fexpa_s,
1020 gen_helper_sve_fexpa_d,
1021 };
1022 if (a->esz == 0) {
1023 return false;
1024 }
1025 if (sve_access_check(s)) {
1026 unsigned vsz = vec_full_reg_size(s);
1027 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1028 vec_full_reg_offset(s, a->rn),
1029 vsz, vsz, 0, fns[a->esz]);
1030 }
1031 return true;
1032}
1033
a1f233f2
RH
1034static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1035{
1036 static gen_helper_gvec_3 * const fns[4] = {
1037 NULL,
1038 gen_helper_sve_ftssel_h,
1039 gen_helper_sve_ftssel_s,
1040 gen_helper_sve_ftssel_d,
1041 };
1042 if (a->esz == 0) {
1043 return false;
1044 }
1045 if (sve_access_check(s)) {
1046 unsigned vsz = vec_full_reg_size(s);
1047 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1048 vec_full_reg_offset(s, a->rn),
1049 vec_full_reg_offset(s, a->rm),
1050 vsz, vsz, 0, fns[a->esz]);
1051 }
1052 return true;
1053}
1054
516e246a
RH
1055/*
1056 *** SVE Predicate Logical Operations Group
1057 */
1058
1059static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1060 const GVecGen4 *gvec_op)
1061{
1062 if (!sve_access_check(s)) {
1063 return true;
1064 }
1065
1066 unsigned psz = pred_gvec_reg_size(s);
1067 int dofs = pred_full_reg_offset(s, a->rd);
1068 int nofs = pred_full_reg_offset(s, a->rn);
1069 int mofs = pred_full_reg_offset(s, a->rm);
1070 int gofs = pred_full_reg_offset(s, a->pg);
1071
1072 if (psz == 8) {
1073 /* Do the operation and the flags generation in temps. */
1074 TCGv_i64 pd = tcg_temp_new_i64();
1075 TCGv_i64 pn = tcg_temp_new_i64();
1076 TCGv_i64 pm = tcg_temp_new_i64();
1077 TCGv_i64 pg = tcg_temp_new_i64();
1078
1079 tcg_gen_ld_i64(pn, cpu_env, nofs);
1080 tcg_gen_ld_i64(pm, cpu_env, mofs);
1081 tcg_gen_ld_i64(pg, cpu_env, gofs);
1082
1083 gvec_op->fni8(pd, pn, pm, pg);
1084 tcg_gen_st_i64(pd, cpu_env, dofs);
1085
1086 do_predtest1(pd, pg);
1087
1088 tcg_temp_free_i64(pd);
1089 tcg_temp_free_i64(pn);
1090 tcg_temp_free_i64(pm);
1091 tcg_temp_free_i64(pg);
1092 } else {
1093 /* The operation and flags generation is large. The computation
1094 * of the flags depends on the original contents of the guarding
1095 * predicate. If the destination overwrites the guarding predicate,
1096 * then the easiest way to get this right is to save a copy.
1097 */
1098 int tofs = gofs;
1099 if (a->rd == a->pg) {
1100 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1101 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1102 }
1103
1104 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1105 do_predtest(s, dofs, tofs, psz / 8);
1106 }
1107 return true;
1108}
1109
1110static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1111{
1112 tcg_gen_and_i64(pd, pn, pm);
1113 tcg_gen_and_i64(pd, pd, pg);
1114}
1115
1116static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1117 TCGv_vec pm, TCGv_vec pg)
1118{
1119 tcg_gen_and_vec(vece, pd, pn, pm);
1120 tcg_gen_and_vec(vece, pd, pd, pg);
1121}
1122
1123static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1124{
1125 static const GVecGen4 op = {
1126 .fni8 = gen_and_pg_i64,
1127 .fniv = gen_and_pg_vec,
1128 .fno = gen_helper_sve_and_pppp,
1129 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1130 };
1131 if (a->s) {
1132 return do_pppp_flags(s, a, &op);
1133 } else if (a->rn == a->rm) {
1134 if (a->pg == a->rn) {
1135 return do_mov_p(s, a->rd, a->rn);
1136 } else {
1137 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1138 }
1139 } else if (a->pg == a->rn || a->pg == a->rm) {
1140 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1141 } else {
1142 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1143 }
1144}
1145
1146static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1147{
1148 tcg_gen_andc_i64(pd, pn, pm);
1149 tcg_gen_and_i64(pd, pd, pg);
1150}
1151
1152static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1153 TCGv_vec pm, TCGv_vec pg)
1154{
1155 tcg_gen_andc_vec(vece, pd, pn, pm);
1156 tcg_gen_and_vec(vece, pd, pd, pg);
1157}
1158
1159static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1160{
1161 static const GVecGen4 op = {
1162 .fni8 = gen_bic_pg_i64,
1163 .fniv = gen_bic_pg_vec,
1164 .fno = gen_helper_sve_bic_pppp,
1165 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1166 };
1167 if (a->s) {
1168 return do_pppp_flags(s, a, &op);
1169 } else if (a->pg == a->rn) {
1170 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1171 } else {
1172 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1173 }
1174}
1175
1176static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1177{
1178 tcg_gen_xor_i64(pd, pn, pm);
1179 tcg_gen_and_i64(pd, pd, pg);
1180}
1181
1182static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1183 TCGv_vec pm, TCGv_vec pg)
1184{
1185 tcg_gen_xor_vec(vece, pd, pn, pm);
1186 tcg_gen_and_vec(vece, pd, pd, pg);
1187}
1188
1189static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1190{
1191 static const GVecGen4 op = {
1192 .fni8 = gen_eor_pg_i64,
1193 .fniv = gen_eor_pg_vec,
1194 .fno = gen_helper_sve_eor_pppp,
1195 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1196 };
1197 if (a->s) {
1198 return do_pppp_flags(s, a, &op);
1199 } else {
1200 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1201 }
1202}
1203
1204static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1205{
1206 tcg_gen_and_i64(pn, pn, pg);
1207 tcg_gen_andc_i64(pm, pm, pg);
1208 tcg_gen_or_i64(pd, pn, pm);
1209}
1210
1211static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1212 TCGv_vec pm, TCGv_vec pg)
1213{
1214 tcg_gen_and_vec(vece, pn, pn, pg);
1215 tcg_gen_andc_vec(vece, pm, pm, pg);
1216 tcg_gen_or_vec(vece, pd, pn, pm);
1217}
1218
1219static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1220{
1221 static const GVecGen4 op = {
1222 .fni8 = gen_sel_pg_i64,
1223 .fniv = gen_sel_pg_vec,
1224 .fno = gen_helper_sve_sel_pppp,
1225 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1226 };
1227 if (a->s) {
1228 return false;
1229 } else {
1230 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1231 }
1232}
1233
1234static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1235{
1236 tcg_gen_or_i64(pd, pn, pm);
1237 tcg_gen_and_i64(pd, pd, pg);
1238}
1239
1240static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1241 TCGv_vec pm, TCGv_vec pg)
1242{
1243 tcg_gen_or_vec(vece, pd, pn, pm);
1244 tcg_gen_and_vec(vece, pd, pd, pg);
1245}
1246
1247static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1248{
1249 static const GVecGen4 op = {
1250 .fni8 = gen_orr_pg_i64,
1251 .fniv = gen_orr_pg_vec,
1252 .fno = gen_helper_sve_orr_pppp,
1253 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1254 };
1255 if (a->s) {
1256 return do_pppp_flags(s, a, &op);
1257 } else if (a->pg == a->rn && a->rn == a->rm) {
1258 return do_mov_p(s, a->rd, a->rn);
1259 } else {
1260 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261 }
1262}
1263
1264static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265{
1266 tcg_gen_orc_i64(pd, pn, pm);
1267 tcg_gen_and_i64(pd, pd, pg);
1268}
1269
1270static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271 TCGv_vec pm, TCGv_vec pg)
1272{
1273 tcg_gen_orc_vec(vece, pd, pn, pm);
1274 tcg_gen_and_vec(vece, pd, pd, pg);
1275}
1276
1277static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1278{
1279 static const GVecGen4 op = {
1280 .fni8 = gen_orn_pg_i64,
1281 .fniv = gen_orn_pg_vec,
1282 .fno = gen_helper_sve_orn_pppp,
1283 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284 };
1285 if (a->s) {
1286 return do_pppp_flags(s, a, &op);
1287 } else {
1288 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289 }
1290}
1291
1292static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1293{
1294 tcg_gen_or_i64(pd, pn, pm);
1295 tcg_gen_andc_i64(pd, pg, pd);
1296}
1297
1298static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1299 TCGv_vec pm, TCGv_vec pg)
1300{
1301 tcg_gen_or_vec(vece, pd, pn, pm);
1302 tcg_gen_andc_vec(vece, pd, pg, pd);
1303}
1304
1305static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1306{
1307 static const GVecGen4 op = {
1308 .fni8 = gen_nor_pg_i64,
1309 .fniv = gen_nor_pg_vec,
1310 .fno = gen_helper_sve_nor_pppp,
1311 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1312 };
1313 if (a->s) {
1314 return do_pppp_flags(s, a, &op);
1315 } else {
1316 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1317 }
1318}
1319
1320static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1321{
1322 tcg_gen_and_i64(pd, pn, pm);
1323 tcg_gen_andc_i64(pd, pg, pd);
1324}
1325
1326static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1327 TCGv_vec pm, TCGv_vec pg)
1328{
1329 tcg_gen_and_vec(vece, pd, pn, pm);
1330 tcg_gen_andc_vec(vece, pd, pg, pd);
1331}
1332
1333static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1334{
1335 static const GVecGen4 op = {
1336 .fni8 = gen_nand_pg_i64,
1337 .fniv = gen_nand_pg_vec,
1338 .fno = gen_helper_sve_nand_pppp,
1339 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1340 };
1341 if (a->s) {
1342 return do_pppp_flags(s, a, &op);
1343 } else {
1344 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1345 }
1346}
1347
9e18d7a6
RH
1348/*
1349 *** SVE Predicate Misc Group
1350 */
1351
1352static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1353{
1354 if (sve_access_check(s)) {
1355 int nofs = pred_full_reg_offset(s, a->rn);
1356 int gofs = pred_full_reg_offset(s, a->pg);
1357 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1358
1359 if (words == 1) {
1360 TCGv_i64 pn = tcg_temp_new_i64();
1361 TCGv_i64 pg = tcg_temp_new_i64();
1362
1363 tcg_gen_ld_i64(pn, cpu_env, nofs);
1364 tcg_gen_ld_i64(pg, cpu_env, gofs);
1365 do_predtest1(pn, pg);
1366
1367 tcg_temp_free_i64(pn);
1368 tcg_temp_free_i64(pg);
1369 } else {
1370 do_predtest(s, nofs, gofs, words);
1371 }
1372 }
1373 return true;
1374}
1375
028e2a7b
RH
1376/* See the ARM pseudocode DecodePredCount. */
1377static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1378{
1379 unsigned elements = fullsz >> esz;
1380 unsigned bound;
1381
1382 switch (pattern) {
1383 case 0x0: /* POW2 */
1384 return pow2floor(elements);
1385 case 0x1: /* VL1 */
1386 case 0x2: /* VL2 */
1387 case 0x3: /* VL3 */
1388 case 0x4: /* VL4 */
1389 case 0x5: /* VL5 */
1390 case 0x6: /* VL6 */
1391 case 0x7: /* VL7 */
1392 case 0x8: /* VL8 */
1393 bound = pattern;
1394 break;
1395 case 0x9: /* VL16 */
1396 case 0xa: /* VL32 */
1397 case 0xb: /* VL64 */
1398 case 0xc: /* VL128 */
1399 case 0xd: /* VL256 */
1400 bound = 16 << (pattern - 9);
1401 break;
1402 case 0x1d: /* MUL4 */
1403 return elements - elements % 4;
1404 case 0x1e: /* MUL3 */
1405 return elements - elements % 3;
1406 case 0x1f: /* ALL */
1407 return elements;
1408 default: /* #uimm5 */
1409 return 0;
1410 }
1411 return elements >= bound ? bound : 0;
1412}
1413
1414/* This handles all of the predicate initialization instructions,
1415 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1416 * so that decode_pred_count returns 0. For SETFFR, we will have
1417 * set RD == 16 == FFR.
1418 */
1419static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1420{
1421 if (!sve_access_check(s)) {
1422 return true;
1423 }
1424
1425 unsigned fullsz = vec_full_reg_size(s);
1426 unsigned ofs = pred_full_reg_offset(s, rd);
1427 unsigned numelem, setsz, i;
1428 uint64_t word, lastword;
1429 TCGv_i64 t;
1430
1431 numelem = decode_pred_count(fullsz, pat, esz);
1432
1433 /* Determine what we must store into each bit, and how many. */
1434 if (numelem == 0) {
1435 lastword = word = 0;
1436 setsz = fullsz;
1437 } else {
1438 setsz = numelem << esz;
1439 lastword = word = pred_esz_masks[esz];
1440 if (setsz % 64) {
973558a3 1441 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
028e2a7b
RH
1442 }
1443 }
1444
1445 t = tcg_temp_new_i64();
1446 if (fullsz <= 64) {
1447 tcg_gen_movi_i64(t, lastword);
1448 tcg_gen_st_i64(t, cpu_env, ofs);
1449 goto done;
1450 }
1451
1452 if (word == lastword) {
1453 unsigned maxsz = size_for_gvec(fullsz / 8);
1454 unsigned oprsz = size_for_gvec(setsz / 8);
1455
1456 if (oprsz * 8 == setsz) {
1457 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1458 goto done;
1459 }
028e2a7b
RH
1460 }
1461
1462 setsz /= 8;
1463 fullsz /= 8;
1464
1465 tcg_gen_movi_i64(t, word);
973558a3 1466 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
028e2a7b
RH
1467 tcg_gen_st_i64(t, cpu_env, ofs + i);
1468 }
1469 if (lastword != word) {
1470 tcg_gen_movi_i64(t, lastword);
1471 tcg_gen_st_i64(t, cpu_env, ofs + i);
1472 i += 8;
1473 }
1474 if (i < fullsz) {
1475 tcg_gen_movi_i64(t, 0);
1476 for (; i < fullsz; i += 8) {
1477 tcg_gen_st_i64(t, cpu_env, ofs + i);
1478 }
1479 }
1480
1481 done:
1482 tcg_temp_free_i64(t);
1483
1484 /* PTRUES */
1485 if (setflag) {
1486 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1487 tcg_gen_movi_i32(cpu_CF, word == 0);
1488 tcg_gen_movi_i32(cpu_VF, 0);
1489 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1490 }
1491 return true;
1492}
1493
1494static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1495{
1496 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1497}
1498
1499static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1500{
1501 /* Note pat == 31 is #all, to set all elements. */
1502 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1503}
1504
1505static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1506{
1507 /* Note pat == 32 is #unimp, to set no elements. */
1508 return do_predset(s, 0, a->rd, 32, false);
1509}
1510
1511static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1512{
1513 /* The path through do_pppp_flags is complicated enough to want to avoid
1514 * duplication. Frob the arguments into the form of a predicated AND.
1515 */
1516 arg_rprr_s alt_a = {
1517 .rd = a->rd, .pg = a->pg, .s = a->s,
1518 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1519 };
1520 return trans_AND_pppp(s, &alt_a, insn);
1521}
1522
1523static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1524{
1525 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1526}
1527
1528static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1529{
1530 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1531}
1532
1533static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1534 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1535 TCGv_ptr, TCGv_i32))
1536{
1537 if (!sve_access_check(s)) {
1538 return true;
1539 }
1540
1541 TCGv_ptr t_pd = tcg_temp_new_ptr();
1542 TCGv_ptr t_pg = tcg_temp_new_ptr();
1543 TCGv_i32 t;
1544 unsigned desc;
1545
1546 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1547 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1548
1549 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1550 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1551 t = tcg_const_i32(desc);
1552
1553 gen_fn(t, t_pd, t_pg, t);
1554 tcg_temp_free_ptr(t_pd);
1555 tcg_temp_free_ptr(t_pg);
1556
1557 do_pred_flags(t);
1558 tcg_temp_free_i32(t);
1559 return true;
1560}
1561
1562static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1563{
1564 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1565}
1566
1567static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1568{
1569 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1570}
1571
24e82e68
RH
1572/*
1573 *** SVE Element Count Group
1574 */
1575
1576/* Perform an inline saturating addition of a 32-bit value within
1577 * a 64-bit register. The second operand is known to be positive,
1578 * which halves the comparisions we must perform to bound the result.
1579 */
1580static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1581{
1582 int64_t ibound;
1583 TCGv_i64 bound;
1584 TCGCond cond;
1585
1586 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1587 if (u) {
1588 tcg_gen_ext32u_i64(reg, reg);
1589 } else {
1590 tcg_gen_ext32s_i64(reg, reg);
1591 }
1592 if (d) {
1593 tcg_gen_sub_i64(reg, reg, val);
1594 ibound = (u ? 0 : INT32_MIN);
1595 cond = TCG_COND_LT;
1596 } else {
1597 tcg_gen_add_i64(reg, reg, val);
1598 ibound = (u ? UINT32_MAX : INT32_MAX);
1599 cond = TCG_COND_GT;
1600 }
1601 bound = tcg_const_i64(ibound);
1602 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1603 tcg_temp_free_i64(bound);
1604}
1605
1606/* Similarly with 64-bit values. */
1607static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1608{
1609 TCGv_i64 t0 = tcg_temp_new_i64();
1610 TCGv_i64 t1 = tcg_temp_new_i64();
1611 TCGv_i64 t2;
1612
1613 if (u) {
1614 if (d) {
1615 tcg_gen_sub_i64(t0, reg, val);
1616 tcg_gen_movi_i64(t1, 0);
1617 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1618 } else {
1619 tcg_gen_add_i64(t0, reg, val);
1620 tcg_gen_movi_i64(t1, -1);
1621 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1622 }
1623 } else {
1624 if (d) {
1625 /* Detect signed overflow for subtraction. */
1626 tcg_gen_xor_i64(t0, reg, val);
1627 tcg_gen_sub_i64(t1, reg, val);
1628 tcg_gen_xor_i64(reg, reg, t0);
1629 tcg_gen_and_i64(t0, t0, reg);
1630
1631 /* Bound the result. */
1632 tcg_gen_movi_i64(reg, INT64_MIN);
1633 t2 = tcg_const_i64(0);
1634 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1635 } else {
1636 /* Detect signed overflow for addition. */
1637 tcg_gen_xor_i64(t0, reg, val);
1638 tcg_gen_add_i64(reg, reg, val);
1639 tcg_gen_xor_i64(t1, reg, val);
1640 tcg_gen_andc_i64(t0, t1, t0);
1641
1642 /* Bound the result. */
1643 tcg_gen_movi_i64(t1, INT64_MAX);
1644 t2 = tcg_const_i64(0);
1645 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1646 }
1647 tcg_temp_free_i64(t2);
1648 }
1649 tcg_temp_free_i64(t0);
1650 tcg_temp_free_i64(t1);
1651}
1652
1653/* Similarly with a vector and a scalar operand. */
1654static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1655 TCGv_i64 val, bool u, bool d)
1656{
1657 unsigned vsz = vec_full_reg_size(s);
1658 TCGv_ptr dptr, nptr;
1659 TCGv_i32 t32, desc;
1660 TCGv_i64 t64;
1661
1662 dptr = tcg_temp_new_ptr();
1663 nptr = tcg_temp_new_ptr();
1664 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1665 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1666 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1667
1668 switch (esz) {
1669 case MO_8:
1670 t32 = tcg_temp_new_i32();
1671 tcg_gen_extrl_i64_i32(t32, val);
1672 if (d) {
1673 tcg_gen_neg_i32(t32, t32);
1674 }
1675 if (u) {
1676 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1677 } else {
1678 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1679 }
1680 tcg_temp_free_i32(t32);
1681 break;
1682
1683 case MO_16:
1684 t32 = tcg_temp_new_i32();
1685 tcg_gen_extrl_i64_i32(t32, val);
1686 if (d) {
1687 tcg_gen_neg_i32(t32, t32);
1688 }
1689 if (u) {
1690 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1691 } else {
1692 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1693 }
1694 tcg_temp_free_i32(t32);
1695 break;
1696
1697 case MO_32:
1698 t64 = tcg_temp_new_i64();
1699 if (d) {
1700 tcg_gen_neg_i64(t64, val);
1701 } else {
1702 tcg_gen_mov_i64(t64, val);
1703 }
1704 if (u) {
1705 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1706 } else {
1707 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1708 }
1709 tcg_temp_free_i64(t64);
1710 break;
1711
1712 case MO_64:
1713 if (u) {
1714 if (d) {
1715 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1716 } else {
1717 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1718 }
1719 } else if (d) {
1720 t64 = tcg_temp_new_i64();
1721 tcg_gen_neg_i64(t64, val);
1722 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1723 tcg_temp_free_i64(t64);
1724 } else {
1725 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1726 }
1727 break;
1728
1729 default:
1730 g_assert_not_reached();
1731 }
1732
1733 tcg_temp_free_ptr(dptr);
1734 tcg_temp_free_ptr(nptr);
1735 tcg_temp_free_i32(desc);
1736}
1737
1738static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1739{
1740 if (sve_access_check(s)) {
1741 unsigned fullsz = vec_full_reg_size(s);
1742 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1743 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1744 }
1745 return true;
1746}
1747
1748static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1749{
1750 if (sve_access_check(s)) {
1751 unsigned fullsz = vec_full_reg_size(s);
1752 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1753 int inc = numelem * a->imm * (a->d ? -1 : 1);
1754 TCGv_i64 reg = cpu_reg(s, a->rd);
1755
1756 tcg_gen_addi_i64(reg, reg, inc);
1757 }
1758 return true;
1759}
1760
1761static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1762 uint32_t insn)
1763{
1764 if (!sve_access_check(s)) {
1765 return true;
1766 }
1767
1768 unsigned fullsz = vec_full_reg_size(s);
1769 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1770 int inc = numelem * a->imm;
1771 TCGv_i64 reg = cpu_reg(s, a->rd);
1772
1773 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1774 if (inc == 0) {
1775 if (a->u) {
1776 tcg_gen_ext32u_i64(reg, reg);
1777 } else {
1778 tcg_gen_ext32s_i64(reg, reg);
1779 }
1780 } else {
1781 TCGv_i64 t = tcg_const_i64(inc);
1782 do_sat_addsub_32(reg, t, a->u, a->d);
1783 tcg_temp_free_i64(t);
1784 }
1785 return true;
1786}
1787
1788static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1789 uint32_t insn)
1790{
1791 if (!sve_access_check(s)) {
1792 return true;
1793 }
1794
1795 unsigned fullsz = vec_full_reg_size(s);
1796 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1797 int inc = numelem * a->imm;
1798 TCGv_i64 reg = cpu_reg(s, a->rd);
1799
1800 if (inc != 0) {
1801 TCGv_i64 t = tcg_const_i64(inc);
1802 do_sat_addsub_64(reg, t, a->u, a->d);
1803 tcg_temp_free_i64(t);
1804 }
1805 return true;
1806}
1807
1808static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1809{
1810 if (a->esz == 0) {
1811 return false;
1812 }
1813
1814 unsigned fullsz = vec_full_reg_size(s);
1815 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1816 int inc = numelem * a->imm;
1817
1818 if (inc != 0) {
1819 if (sve_access_check(s)) {
1820 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1821 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1822 vec_full_reg_offset(s, a->rn),
1823 t, fullsz, fullsz);
1824 tcg_temp_free_i64(t);
1825 }
1826 } else {
1827 do_mov_z(s, a->rd, a->rn);
1828 }
1829 return true;
1830}
1831
1832static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1833 uint32_t insn)
1834{
1835 if (a->esz == 0) {
1836 return false;
1837 }
1838
1839 unsigned fullsz = vec_full_reg_size(s);
1840 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1841 int inc = numelem * a->imm;
1842
1843 if (inc != 0) {
1844 if (sve_access_check(s)) {
1845 TCGv_i64 t = tcg_const_i64(inc);
1846 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1847 tcg_temp_free_i64(t);
1848 }
1849 } else {
1850 do_mov_z(s, a->rd, a->rn);
1851 }
1852 return true;
1853}
1854
e1fa1164
RH
1855/*
1856 *** SVE Bitwise Immediate Group
1857 */
1858
1859static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1860{
1861 uint64_t imm;
1862 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1863 extract32(a->dbm, 0, 6),
1864 extract32(a->dbm, 6, 6))) {
1865 return false;
1866 }
1867 if (sve_access_check(s)) {
1868 unsigned vsz = vec_full_reg_size(s);
1869 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1870 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1871 }
1872 return true;
1873}
1874
1875static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1876{
1877 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1878}
1879
1880static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1881{
1882 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1883}
1884
1885static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1886{
1887 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1888}
1889
1890static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1891{
1892 uint64_t imm;
1893 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1894 extract32(a->dbm, 0, 6),
1895 extract32(a->dbm, 6, 6))) {
1896 return false;
1897 }
1898 if (sve_access_check(s)) {
1899 do_dupi_z(s, a->rd, imm);
1900 }
1901 return true;
1902}
1903
f25a2361
RH
1904/*
1905 *** SVE Integer Wide Immediate - Predicated Group
1906 */
1907
1908/* Implement all merging copies. This is used for CPY (immediate),
1909 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1910 */
1911static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1912 TCGv_i64 val)
1913{
1914 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1915 static gen_cpy * const fns[4] = {
1916 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1917 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1918 };
1919 unsigned vsz = vec_full_reg_size(s);
1920 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1921 TCGv_ptr t_zd = tcg_temp_new_ptr();
1922 TCGv_ptr t_zn = tcg_temp_new_ptr();
1923 TCGv_ptr t_pg = tcg_temp_new_ptr();
1924
1925 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1926 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1927 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1928
1929 fns[esz](t_zd, t_zn, t_pg, val, desc);
1930
1931 tcg_temp_free_ptr(t_zd);
1932 tcg_temp_free_ptr(t_zn);
1933 tcg_temp_free_ptr(t_pg);
1934 tcg_temp_free_i32(desc);
1935}
1936
1937static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1938{
1939 if (a->esz == 0) {
1940 return false;
1941 }
1942 if (sve_access_check(s)) {
1943 /* Decode the VFP immediate. */
1944 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1945 TCGv_i64 t_imm = tcg_const_i64(imm);
1946 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1947 tcg_temp_free_i64(t_imm);
1948 }
1949 return true;
1950}
1951
1952static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1953{
1954 if (a->esz == 0 && extract32(insn, 13, 1)) {
1955 return false;
1956 }
1957 if (sve_access_check(s)) {
1958 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1959 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1960 tcg_temp_free_i64(t_imm);
1961 }
1962 return true;
1963}
1964
1965static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1966{
1967 static gen_helper_gvec_2i * const fns[4] = {
1968 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1969 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1970 };
1971
1972 if (a->esz == 0 && extract32(insn, 13, 1)) {
1973 return false;
1974 }
1975 if (sve_access_check(s)) {
1976 unsigned vsz = vec_full_reg_size(s);
1977 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1978 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1979 pred_full_reg_offset(s, a->pg),
1980 t_imm, vsz, vsz, 0, fns[a->esz]);
1981 tcg_temp_free_i64(t_imm);
1982 }
1983 return true;
1984}
1985
b94f8f60
RH
1986/*
1987 *** SVE Permute Extract Group
1988 */
1989
1990static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1991{
1992 if (!sve_access_check(s)) {
1993 return true;
1994 }
1995
1996 unsigned vsz = vec_full_reg_size(s);
1997 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1998 unsigned n_siz = vsz - n_ofs;
1999 unsigned d = vec_full_reg_offset(s, a->rd);
2000 unsigned n = vec_full_reg_offset(s, a->rn);
2001 unsigned m = vec_full_reg_offset(s, a->rm);
2002
2003 /* Use host vector move insns if we have appropriate sizes
2004 * and no unfortunate overlap.
2005 */
2006 if (m != d
2007 && n_ofs == size_for_gvec(n_ofs)
2008 && n_siz == size_for_gvec(n_siz)
2009 && (d != n || n_siz <= n_ofs)) {
2010 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2011 if (n_ofs != 0) {
2012 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2013 }
2014 } else {
2015 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2016 }
2017 return true;
2018}
2019
30562ab7
RH
2020/*
2021 *** SVE Permute - Unpredicated Group
2022 */
2023
2024static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2025{
2026 if (sve_access_check(s)) {
2027 unsigned vsz = vec_full_reg_size(s);
2028 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2029 vsz, vsz, cpu_reg_sp(s, a->rn));
2030 }
2031 return true;
2032}
2033
2034static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2035{
2036 if ((a->imm & 0x1f) == 0) {
2037 return false;
2038 }
2039 if (sve_access_check(s)) {
2040 unsigned vsz = vec_full_reg_size(s);
2041 unsigned dofs = vec_full_reg_offset(s, a->rd);
2042 unsigned esz, index;
2043
2044 esz = ctz32(a->imm);
2045 index = a->imm >> (esz + 1);
2046
2047 if ((index << esz) < vsz) {
2048 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2049 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2050 } else {
2051 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2052 }
2053 }
2054 return true;
2055}
2056
2057static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2058{
2059 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2060 static gen_insr * const fns[4] = {
2061 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2062 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2063 };
2064 unsigned vsz = vec_full_reg_size(s);
2065 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2066 TCGv_ptr t_zd = tcg_temp_new_ptr();
2067 TCGv_ptr t_zn = tcg_temp_new_ptr();
2068
2069 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2070 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2071
2072 fns[a->esz](t_zd, t_zn, val, desc);
2073
2074 tcg_temp_free_ptr(t_zd);
2075 tcg_temp_free_ptr(t_zn);
2076 tcg_temp_free_i32(desc);
2077}
2078
2079static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2080{
2081 if (sve_access_check(s)) {
2082 TCGv_i64 t = tcg_temp_new_i64();
2083 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2084 do_insr_i64(s, a, t);
2085 tcg_temp_free_i64(t);
2086 }
2087 return true;
2088}
2089
2090static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2091{
2092 if (sve_access_check(s)) {
2093 do_insr_i64(s, a, cpu_reg(s, a->rm));
2094 }
2095 return true;
2096}
2097
2098static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2099{
2100 static gen_helper_gvec_2 * const fns[4] = {
2101 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2102 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2103 };
2104
2105 if (sve_access_check(s)) {
2106 unsigned vsz = vec_full_reg_size(s);
2107 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2108 vec_full_reg_offset(s, a->rn),
2109 vsz, vsz, 0, fns[a->esz]);
2110 }
2111 return true;
2112}
2113
2114static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2115{
2116 static gen_helper_gvec_3 * const fns[4] = {
2117 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2118 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2119 };
2120
2121 if (sve_access_check(s)) {
2122 unsigned vsz = vec_full_reg_size(s);
2123 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2124 vec_full_reg_offset(s, a->rn),
2125 vec_full_reg_offset(s, a->rm),
2126 vsz, vsz, 0, fns[a->esz]);
2127 }
2128 return true;
2129}
2130
2131static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2132{
2133 static gen_helper_gvec_2 * const fns[4][2] = {
2134 { NULL, NULL },
2135 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2136 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2137 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2138 };
2139
2140 if (a->esz == 0) {
2141 return false;
2142 }
2143 if (sve_access_check(s)) {
2144 unsigned vsz = vec_full_reg_size(s);
2145 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2146 vec_full_reg_offset(s, a->rn)
2147 + (a->h ? vsz / 2 : 0),
2148 vsz, vsz, 0, fns[a->esz][a->u]);
2149 }
2150 return true;
2151}
2152
d731d8cb
RH
2153/*
2154 *** SVE Permute - Predicates Group
2155 */
2156
2157static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2158 gen_helper_gvec_3 *fn)
2159{
2160 if (!sve_access_check(s)) {
2161 return true;
2162 }
2163
2164 unsigned vsz = pred_full_reg_size(s);
2165
2166 /* Predicate sizes may be smaller and cannot use simd_desc.
2167 We cannot round up, as we do elsewhere, because we need
2168 the exact size for ZIP2 and REV. We retain the style for
2169 the other helpers for consistency. */
2170 TCGv_ptr t_d = tcg_temp_new_ptr();
2171 TCGv_ptr t_n = tcg_temp_new_ptr();
2172 TCGv_ptr t_m = tcg_temp_new_ptr();
2173 TCGv_i32 t_desc;
2174 int desc;
2175
2176 desc = vsz - 2;
2177 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2178 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2179
2180 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2181 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2182 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2183 t_desc = tcg_const_i32(desc);
2184
2185 fn(t_d, t_n, t_m, t_desc);
2186
2187 tcg_temp_free_ptr(t_d);
2188 tcg_temp_free_ptr(t_n);
2189 tcg_temp_free_ptr(t_m);
2190 tcg_temp_free_i32(t_desc);
2191 return true;
2192}
2193
2194static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2195 gen_helper_gvec_2 *fn)
2196{
2197 if (!sve_access_check(s)) {
2198 return true;
2199 }
2200
2201 unsigned vsz = pred_full_reg_size(s);
2202 TCGv_ptr t_d = tcg_temp_new_ptr();
2203 TCGv_ptr t_n = tcg_temp_new_ptr();
2204 TCGv_i32 t_desc;
2205 int desc;
2206
2207 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2208 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2209
2210 /* Predicate sizes may be smaller and cannot use simd_desc.
2211 We cannot round up, as we do elsewhere, because we need
2212 the exact size for ZIP2 and REV. We retain the style for
2213 the other helpers for consistency. */
2214
2215 desc = vsz - 2;
2216 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2217 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2218 t_desc = tcg_const_i32(desc);
2219
2220 fn(t_d, t_n, t_desc);
2221
2222 tcg_temp_free_i32(t_desc);
2223 tcg_temp_free_ptr(t_d);
2224 tcg_temp_free_ptr(t_n);
2225 return true;
2226}
2227
2228static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2229{
2230 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2231}
2232
2233static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2234{
2235 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2236}
2237
2238static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2239{
2240 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2241}
2242
2243static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2244{
2245 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2246}
2247
2248static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2249{
2250 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2251}
2252
2253static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2254{
2255 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2256}
2257
2258static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2259{
2260 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2261}
2262
2263static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2264{
2265 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2266}
2267
2268static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2269{
2270 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2271}
2272
234b48e9
RH
2273/*
2274 *** SVE Permute - Interleaving Group
2275 */
2276
2277static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2278{
2279 static gen_helper_gvec_3 * const fns[4] = {
2280 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2281 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2282 };
2283
2284 if (sve_access_check(s)) {
2285 unsigned vsz = vec_full_reg_size(s);
2286 unsigned high_ofs = high ? vsz / 2 : 0;
2287 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2288 vec_full_reg_offset(s, a->rn) + high_ofs,
2289 vec_full_reg_offset(s, a->rm) + high_ofs,
2290 vsz, vsz, 0, fns[a->esz]);
2291 }
2292 return true;
2293}
2294
2295static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2296 gen_helper_gvec_3 *fn)
2297{
2298 if (sve_access_check(s)) {
2299 unsigned vsz = vec_full_reg_size(s);
2300 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2301 vec_full_reg_offset(s, a->rn),
2302 vec_full_reg_offset(s, a->rm),
2303 vsz, vsz, data, fn);
2304 }
2305 return true;
2306}
2307
2308static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2309{
2310 return do_zip(s, a, false);
2311}
2312
2313static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2314{
2315 return do_zip(s, a, true);
2316}
2317
2318static gen_helper_gvec_3 * const uzp_fns[4] = {
2319 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2320 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2321};
2322
2323static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2324{
2325 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2326}
2327
2328static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2329{
2330 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2331}
2332
2333static gen_helper_gvec_3 * const trn_fns[4] = {
2334 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2335 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2336};
2337
2338static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2339{
2340 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2341}
2342
2343static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2344{
2345 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2346}
2347
3ca879ae
RH
2348/*
2349 *** SVE Permute Vector - Predicated Group
2350 */
2351
2352static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2353{
2354 static gen_helper_gvec_3 * const fns[4] = {
2355 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2356 };
2357 return do_zpz_ool(s, a, fns[a->esz]);
2358}
2359
ef23cb72
RH
2360/* Call the helper that computes the ARM LastActiveElement pseudocode
2361 * function, scaled by the element size. This includes the not found
2362 * indication; e.g. not found for esz=3 is -8.
2363 */
2364static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2365{
2366 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2367 * round up, as we do elsewhere, because we need the exact size.
2368 */
2369 TCGv_ptr t_p = tcg_temp_new_ptr();
2370 TCGv_i32 t_desc;
2371 unsigned vsz = pred_full_reg_size(s);
2372 unsigned desc;
2373
2374 desc = vsz - 2;
2375 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2376
2377 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2378 t_desc = tcg_const_i32(desc);
2379
2380 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2381
2382 tcg_temp_free_i32(t_desc);
2383 tcg_temp_free_ptr(t_p);
2384}
2385
2386/* Increment LAST to the offset of the next element in the vector,
2387 * wrapping around to 0.
2388 */
2389static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390{
2391 unsigned vsz = vec_full_reg_size(s);
2392
2393 tcg_gen_addi_i32(last, last, 1 << esz);
2394 if (is_power_of_2(vsz)) {
2395 tcg_gen_andi_i32(last, last, vsz - 1);
2396 } else {
2397 TCGv_i32 max = tcg_const_i32(vsz);
2398 TCGv_i32 zero = tcg_const_i32(0);
2399 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2400 tcg_temp_free_i32(max);
2401 tcg_temp_free_i32(zero);
2402 }
2403}
2404
2405/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2406static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2407{
2408 unsigned vsz = vec_full_reg_size(s);
2409
2410 if (is_power_of_2(vsz)) {
2411 tcg_gen_andi_i32(last, last, vsz - 1);
2412 } else {
2413 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2414 TCGv_i32 zero = tcg_const_i32(0);
2415 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2416 tcg_temp_free_i32(max);
2417 tcg_temp_free_i32(zero);
2418 }
2419}
2420
2421/* Load an unsigned element of ESZ from BASE+OFS. */
2422static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2423{
2424 TCGv_i64 r = tcg_temp_new_i64();
2425
2426 switch (esz) {
2427 case 0:
2428 tcg_gen_ld8u_i64(r, base, ofs);
2429 break;
2430 case 1:
2431 tcg_gen_ld16u_i64(r, base, ofs);
2432 break;
2433 case 2:
2434 tcg_gen_ld32u_i64(r, base, ofs);
2435 break;
2436 case 3:
2437 tcg_gen_ld_i64(r, base, ofs);
2438 break;
2439 default:
2440 g_assert_not_reached();
2441 }
2442 return r;
2443}
2444
2445/* Load an unsigned element of ESZ from RM[LAST]. */
2446static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2447 int rm, int esz)
2448{
2449 TCGv_ptr p = tcg_temp_new_ptr();
2450 TCGv_i64 r;
2451
2452 /* Convert offset into vector into offset into ENV.
2453 * The final adjustment for the vector register base
2454 * is added via constant offset to the load.
2455 */
2456#ifdef HOST_WORDS_BIGENDIAN
2457 /* Adjust for element ordering. See vec_reg_offset. */
2458 if (esz < 3) {
2459 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2460 }
2461#endif
2462 tcg_gen_ext_i32_ptr(p, last);
2463 tcg_gen_add_ptr(p, p, cpu_env);
2464
2465 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2466 tcg_temp_free_ptr(p);
2467
2468 return r;
2469}
2470
2471/* Compute CLAST for a Zreg. */
2472static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2473{
2474 TCGv_i32 last;
2475 TCGLabel *over;
2476 TCGv_i64 ele;
2477 unsigned vsz, esz = a->esz;
2478
2479 if (!sve_access_check(s)) {
2480 return true;
2481 }
2482
2483 last = tcg_temp_local_new_i32();
2484 over = gen_new_label();
2485
2486 find_last_active(s, last, esz, a->pg);
2487
2488 /* There is of course no movcond for a 2048-bit vector,
2489 * so we must branch over the actual store.
2490 */
2491 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2492
2493 if (!before) {
2494 incr_last_active(s, last, esz);
2495 }
2496
2497 ele = load_last_active(s, last, a->rm, esz);
2498 tcg_temp_free_i32(last);
2499
2500 vsz = vec_full_reg_size(s);
2501 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2502 tcg_temp_free_i64(ele);
2503
2504 /* If this insn used MOVPRFX, we may need a second move. */
2505 if (a->rd != a->rn) {
2506 TCGLabel *done = gen_new_label();
2507 tcg_gen_br(done);
2508
2509 gen_set_label(over);
2510 do_mov_z(s, a->rd, a->rn);
2511
2512 gen_set_label(done);
2513 } else {
2514 gen_set_label(over);
2515 }
2516 return true;
2517}
2518
2519static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2520{
2521 return do_clast_vector(s, a, false);
2522}
2523
2524static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2525{
2526 return do_clast_vector(s, a, true);
2527}
2528
2529/* Compute CLAST for a scalar. */
2530static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2531 bool before, TCGv_i64 reg_val)
2532{
2533 TCGv_i32 last = tcg_temp_new_i32();
2534 TCGv_i64 ele, cmp, zero;
2535
2536 find_last_active(s, last, esz, pg);
2537
2538 /* Extend the original value of last prior to incrementing. */
2539 cmp = tcg_temp_new_i64();
2540 tcg_gen_ext_i32_i64(cmp, last);
2541
2542 if (!before) {
2543 incr_last_active(s, last, esz);
2544 }
2545
2546 /* The conceit here is that while last < 0 indicates not found, after
2547 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2548 * from which we can load garbage. We then discard the garbage with
2549 * a conditional move.
2550 */
2551 ele = load_last_active(s, last, rm, esz);
2552 tcg_temp_free_i32(last);
2553
2554 zero = tcg_const_i64(0);
2555 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2556
2557 tcg_temp_free_i64(zero);
2558 tcg_temp_free_i64(cmp);
2559 tcg_temp_free_i64(ele);
2560}
2561
2562/* Compute CLAST for a Vreg. */
2563static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2564{
2565 if (sve_access_check(s)) {
2566 int esz = a->esz;
2567 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2568 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2569
2570 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2571 write_fp_dreg(s, a->rd, reg);
2572 tcg_temp_free_i64(reg);
2573 }
2574 return true;
2575}
2576
2577static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2578{
2579 return do_clast_fp(s, a, false);
2580}
2581
2582static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2583{
2584 return do_clast_fp(s, a, true);
2585}
2586
2587/* Compute CLAST for a Xreg. */
2588static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2589{
2590 TCGv_i64 reg;
2591
2592 if (!sve_access_check(s)) {
2593 return true;
2594 }
2595
2596 reg = cpu_reg(s, a->rd);
2597 switch (a->esz) {
2598 case 0:
2599 tcg_gen_ext8u_i64(reg, reg);
2600 break;
2601 case 1:
2602 tcg_gen_ext16u_i64(reg, reg);
2603 break;
2604 case 2:
2605 tcg_gen_ext32u_i64(reg, reg);
2606 break;
2607 case 3:
2608 break;
2609 default:
2610 g_assert_not_reached();
2611 }
2612
2613 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2614 return true;
2615}
2616
2617static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2618{
2619 return do_clast_general(s, a, false);
2620}
2621
2622static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2623{
2624 return do_clast_general(s, a, true);
2625}
2626
2627/* Compute LAST for a scalar. */
2628static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2629 int pg, int rm, bool before)
2630{
2631 TCGv_i32 last = tcg_temp_new_i32();
2632 TCGv_i64 ret;
2633
2634 find_last_active(s, last, esz, pg);
2635 if (before) {
2636 wrap_last_active(s, last, esz);
2637 } else {
2638 incr_last_active(s, last, esz);
2639 }
2640
2641 ret = load_last_active(s, last, rm, esz);
2642 tcg_temp_free_i32(last);
2643 return ret;
2644}
2645
2646/* Compute LAST for a Vreg. */
2647static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2648{
2649 if (sve_access_check(s)) {
2650 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2651 write_fp_dreg(s, a->rd, val);
2652 tcg_temp_free_i64(val);
2653 }
2654 return true;
2655}
2656
2657static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2658{
2659 return do_last_fp(s, a, false);
2660}
2661
2662static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2663{
2664 return do_last_fp(s, a, true);
2665}
2666
2667/* Compute LAST for a Xreg. */
2668static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2669{
2670 if (sve_access_check(s)) {
2671 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2672 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2673 tcg_temp_free_i64(val);
2674 }
2675 return true;
2676}
2677
2678static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2679{
2680 return do_last_general(s, a, false);
2681}
2682
2683static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2684{
2685 return do_last_general(s, a, true);
2686}
2687
792a5578
RH
2688static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2689{
2690 if (sve_access_check(s)) {
2691 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2692 }
2693 return true;
2694}
2695
2696static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2697{
2698 if (sve_access_check(s)) {
2699 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2700 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2701 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2702 tcg_temp_free_i64(t);
2703 }
2704 return true;
2705}
2706
dae8fb90
RH
2707static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2708{
2709 static gen_helper_gvec_3 * const fns[4] = {
2710 NULL,
2711 gen_helper_sve_revb_h,
2712 gen_helper_sve_revb_s,
2713 gen_helper_sve_revb_d,
2714 };
2715 return do_zpz_ool(s, a, fns[a->esz]);
2716}
2717
2718static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2719{
2720 static gen_helper_gvec_3 * const fns[4] = {
2721 NULL,
2722 NULL,
2723 gen_helper_sve_revh_s,
2724 gen_helper_sve_revh_d,
2725 };
2726 return do_zpz_ool(s, a, fns[a->esz]);
2727}
2728
2729static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2730{
2731 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2732}
2733
2734static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2735{
2736 static gen_helper_gvec_3 * const fns[4] = {
2737 gen_helper_sve_rbit_b,
2738 gen_helper_sve_rbit_h,
2739 gen_helper_sve_rbit_s,
2740 gen_helper_sve_rbit_d,
2741 };
2742 return do_zpz_ool(s, a, fns[a->esz]);
2743}
2744
b48ff240
RH
2745static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2746{
2747 if (sve_access_check(s)) {
2748 unsigned vsz = vec_full_reg_size(s);
2749 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2750 vec_full_reg_offset(s, a->rn),
2751 vec_full_reg_offset(s, a->rm),
2752 pred_full_reg_offset(s, a->pg),
2753 vsz, vsz, a->esz, gen_helper_sve_splice);
2754 }
2755 return true;
2756}
2757
757f9cff
RH
2758/*
2759 *** SVE Integer Compare - Vectors Group
2760 */
2761
2762static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2763 gen_helper_gvec_flags_4 *gen_fn)
2764{
2765 TCGv_ptr pd, zn, zm, pg;
2766 unsigned vsz;
2767 TCGv_i32 t;
2768
2769 if (gen_fn == NULL) {
2770 return false;
2771 }
2772 if (!sve_access_check(s)) {
2773 return true;
2774 }
2775
2776 vsz = vec_full_reg_size(s);
2777 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2778 pd = tcg_temp_new_ptr();
2779 zn = tcg_temp_new_ptr();
2780 zm = tcg_temp_new_ptr();
2781 pg = tcg_temp_new_ptr();
2782
2783 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2784 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2785 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2786 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2787
2788 gen_fn(t, pd, zn, zm, pg, t);
2789
2790 tcg_temp_free_ptr(pd);
2791 tcg_temp_free_ptr(zn);
2792 tcg_temp_free_ptr(zm);
2793 tcg_temp_free_ptr(pg);
2794
2795 do_pred_flags(t);
2796
2797 tcg_temp_free_i32(t);
2798 return true;
2799}
2800
2801#define DO_PPZZ(NAME, name) \
2802static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2803 uint32_t insn) \
2804{ \
2805 static gen_helper_gvec_flags_4 * const fns[4] = { \
2806 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2807 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2808 }; \
2809 return do_ppzz_flags(s, a, fns[a->esz]); \
2810}
2811
2812DO_PPZZ(CMPEQ, cmpeq)
2813DO_PPZZ(CMPNE, cmpne)
2814DO_PPZZ(CMPGT, cmpgt)
2815DO_PPZZ(CMPGE, cmpge)
2816DO_PPZZ(CMPHI, cmphi)
2817DO_PPZZ(CMPHS, cmphs)
2818
2819#undef DO_PPZZ
2820
2821#define DO_PPZW(NAME, name) \
2822static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2823 uint32_t insn) \
2824{ \
2825 static gen_helper_gvec_flags_4 * const fns[4] = { \
2826 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2827 gen_helper_sve_##name##_ppzw_s, NULL \
2828 }; \
2829 return do_ppzz_flags(s, a, fns[a->esz]); \
2830}
2831
2832DO_PPZW(CMPEQ, cmpeq)
2833DO_PPZW(CMPNE, cmpne)
2834DO_PPZW(CMPGT, cmpgt)
2835DO_PPZW(CMPGE, cmpge)
2836DO_PPZW(CMPHI, cmphi)
2837DO_PPZW(CMPHS, cmphs)
2838DO_PPZW(CMPLT, cmplt)
2839DO_PPZW(CMPLE, cmple)
2840DO_PPZW(CMPLO, cmplo)
2841DO_PPZW(CMPLS, cmpls)
2842
2843#undef DO_PPZW
2844
38cadeba
RH
2845/*
2846 *** SVE Integer Compare - Immediate Groups
2847 */
2848
2849static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2850 gen_helper_gvec_flags_3 *gen_fn)
2851{
2852 TCGv_ptr pd, zn, pg;
2853 unsigned vsz;
2854 TCGv_i32 t;
2855
2856 if (gen_fn == NULL) {
2857 return false;
2858 }
2859 if (!sve_access_check(s)) {
2860 return true;
2861 }
2862
2863 vsz = vec_full_reg_size(s);
2864 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2865 pd = tcg_temp_new_ptr();
2866 zn = tcg_temp_new_ptr();
2867 pg = tcg_temp_new_ptr();
2868
2869 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2870 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2871 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2872
2873 gen_fn(t, pd, zn, pg, t);
2874
2875 tcg_temp_free_ptr(pd);
2876 tcg_temp_free_ptr(zn);
2877 tcg_temp_free_ptr(pg);
2878
2879 do_pred_flags(t);
2880
2881 tcg_temp_free_i32(t);
2882 return true;
2883}
2884
2885#define DO_PPZI(NAME, name) \
2886static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2887 uint32_t insn) \
2888{ \
2889 static gen_helper_gvec_flags_3 * const fns[4] = { \
2890 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2891 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2892 }; \
2893 return do_ppzi_flags(s, a, fns[a->esz]); \
2894}
2895
2896DO_PPZI(CMPEQ, cmpeq)
2897DO_PPZI(CMPNE, cmpne)
2898DO_PPZI(CMPGT, cmpgt)
2899DO_PPZI(CMPGE, cmpge)
2900DO_PPZI(CMPHI, cmphi)
2901DO_PPZI(CMPHS, cmphs)
2902DO_PPZI(CMPLT, cmplt)
2903DO_PPZI(CMPLE, cmple)
2904DO_PPZI(CMPLO, cmplo)
2905DO_PPZI(CMPLS, cmpls)
2906
2907#undef DO_PPZI
2908
35da316f
RH
2909/*
2910 *** SVE Partition Break Group
2911 */
2912
2913static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2914 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2915{
2916 if (!sve_access_check(s)) {
2917 return true;
2918 }
2919
2920 unsigned vsz = pred_full_reg_size(s);
2921
2922 /* Predicate sizes may be smaller and cannot use simd_desc. */
2923 TCGv_ptr d = tcg_temp_new_ptr();
2924 TCGv_ptr n = tcg_temp_new_ptr();
2925 TCGv_ptr m = tcg_temp_new_ptr();
2926 TCGv_ptr g = tcg_temp_new_ptr();
2927 TCGv_i32 t = tcg_const_i32(vsz - 2);
2928
2929 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2930 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2931 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2932 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2933
2934 if (a->s) {
2935 fn_s(t, d, n, m, g, t);
2936 do_pred_flags(t);
2937 } else {
2938 fn(d, n, m, g, t);
2939 }
2940 tcg_temp_free_ptr(d);
2941 tcg_temp_free_ptr(n);
2942 tcg_temp_free_ptr(m);
2943 tcg_temp_free_ptr(g);
2944 tcg_temp_free_i32(t);
2945 return true;
2946}
2947
2948static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2949 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2950{
2951 if (!sve_access_check(s)) {
2952 return true;
2953 }
2954
2955 unsigned vsz = pred_full_reg_size(s);
2956
2957 /* Predicate sizes may be smaller and cannot use simd_desc. */
2958 TCGv_ptr d = tcg_temp_new_ptr();
2959 TCGv_ptr n = tcg_temp_new_ptr();
2960 TCGv_ptr g = tcg_temp_new_ptr();
2961 TCGv_i32 t = tcg_const_i32(vsz - 2);
2962
2963 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2964 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2965 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2966
2967 if (a->s) {
2968 fn_s(t, d, n, g, t);
2969 do_pred_flags(t);
2970 } else {
2971 fn(d, n, g, t);
2972 }
2973 tcg_temp_free_ptr(d);
2974 tcg_temp_free_ptr(n);
2975 tcg_temp_free_ptr(g);
2976 tcg_temp_free_i32(t);
2977 return true;
2978}
2979
2980static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2981{
2982 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2983}
2984
2985static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2986{
2987 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2988}
2989
2990static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2991{
2992 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2993}
2994
2995static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2996{
2997 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2998}
2999
3000static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3001{
3002 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3003}
3004
3005static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3006{
3007 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3008}
3009
3010static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3011{
3012 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3013}
3014
9ee3a611
RH
3015/*
3016 *** SVE Predicate Count Group
3017 */
3018
3019static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3020{
3021 unsigned psz = pred_full_reg_size(s);
3022
3023 if (psz <= 8) {
3024 uint64_t psz_mask;
3025
3026 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3027 if (pn != pg) {
3028 TCGv_i64 g = tcg_temp_new_i64();
3029 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3030 tcg_gen_and_i64(val, val, g);
3031 tcg_temp_free_i64(g);
3032 }
3033
3034 /* Reduce the pred_esz_masks value simply to reduce the
3035 * size of the code generated here.
3036 */
3037 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3038 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3039
3040 tcg_gen_ctpop_i64(val, val);
3041 } else {
3042 TCGv_ptr t_pn = tcg_temp_new_ptr();
3043 TCGv_ptr t_pg = tcg_temp_new_ptr();
3044 unsigned desc;
3045 TCGv_i32 t_desc;
3046
3047 desc = psz - 2;
3048 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3049
3050 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3051 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3052 t_desc = tcg_const_i32(desc);
3053
3054 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3055 tcg_temp_free_ptr(t_pn);
3056 tcg_temp_free_ptr(t_pg);
3057 tcg_temp_free_i32(t_desc);
3058 }
3059}
3060
3061static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3062{
3063 if (sve_access_check(s)) {
3064 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3065 }
3066 return true;
3067}
3068
3069static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3070 uint32_t insn)
3071{
3072 if (sve_access_check(s)) {
3073 TCGv_i64 reg = cpu_reg(s, a->rd);
3074 TCGv_i64 val = tcg_temp_new_i64();
3075
3076 do_cntp(s, val, a->esz, a->pg, a->pg);
3077 if (a->d) {
3078 tcg_gen_sub_i64(reg, reg, val);
3079 } else {
3080 tcg_gen_add_i64(reg, reg, val);
3081 }
3082 tcg_temp_free_i64(val);
3083 }
3084 return true;
3085}
3086
3087static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3088 uint32_t insn)
3089{
3090 if (a->esz == 0) {
3091 return false;
3092 }
3093 if (sve_access_check(s)) {
3094 unsigned vsz = vec_full_reg_size(s);
3095 TCGv_i64 val = tcg_temp_new_i64();
3096 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3097
3098 do_cntp(s, val, a->esz, a->pg, a->pg);
3099 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3100 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3101 }
3102 return true;
3103}
3104
3105static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3106 uint32_t insn)
3107{
3108 if (sve_access_check(s)) {
3109 TCGv_i64 reg = cpu_reg(s, a->rd);
3110 TCGv_i64 val = tcg_temp_new_i64();
3111
3112 do_cntp(s, val, a->esz, a->pg, a->pg);
3113 do_sat_addsub_32(reg, val, a->u, a->d);
3114 }
3115 return true;
3116}
3117
3118static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3119 uint32_t insn)
3120{
3121 if (sve_access_check(s)) {
3122 TCGv_i64 reg = cpu_reg(s, a->rd);
3123 TCGv_i64 val = tcg_temp_new_i64();
3124
3125 do_cntp(s, val, a->esz, a->pg, a->pg);
3126 do_sat_addsub_64(reg, val, a->u, a->d);
3127 }
3128 return true;
3129}
3130
3131static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3132 uint32_t insn)
3133{
3134 if (a->esz == 0) {
3135 return false;
3136 }
3137 if (sve_access_check(s)) {
3138 TCGv_i64 val = tcg_temp_new_i64();
3139 do_cntp(s, val, a->esz, a->pg, a->pg);
3140 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3141 }
3142 return true;
3143}
3144
caf1cefc
RH
3145/*
3146 *** SVE Integer Compare Scalars Group
3147 */
3148
3149static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3150{
3151 if (!sve_access_check(s)) {
3152 return true;
3153 }
3154
3155 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3156 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3157 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3158 TCGv_i64 cmp = tcg_temp_new_i64();
3159
3160 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3161 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3162 tcg_temp_free_i64(cmp);
3163
3164 /* VF = !NF & !CF. */
3165 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3166 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3167
3168 /* Both NF and VF actually look at bit 31. */
3169 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3170 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3171 return true;
3172}
3173
3174static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3175{
3176 if (!sve_access_check(s)) {
3177 return true;
3178 }
3179
3180 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3181 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3182 TCGv_i64 t0 = tcg_temp_new_i64();
3183 TCGv_i64 t1 = tcg_temp_new_i64();
3184 TCGv_i32 t2, t3;
3185 TCGv_ptr ptr;
3186 unsigned desc, vsz = vec_full_reg_size(s);
3187 TCGCond cond;
3188
3189 if (!a->sf) {
3190 if (a->u) {
3191 tcg_gen_ext32u_i64(op0, op0);
3192 tcg_gen_ext32u_i64(op1, op1);
3193 } else {
3194 tcg_gen_ext32s_i64(op0, op0);
3195 tcg_gen_ext32s_i64(op1, op1);
3196 }
3197 }
3198
3199 /* For the helper, compress the different conditions into a computation
3200 * of how many iterations for which the condition is true.
3201 *
3202 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3203 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3204 * aren't that large, so any value >= predicate size is sufficient.
3205 */
3206 tcg_gen_sub_i64(t0, op1, op0);
3207
3208 /* t0 = MIN(op1 - op0, vsz). */
3209 tcg_gen_movi_i64(t1, vsz);
3210 tcg_gen_umin_i64(t0, t0, t1);
3211 if (a->eq) {
3212 /* Equality means one more iteration. */
3213 tcg_gen_addi_i64(t0, t0, 1);
3214 }
3215
3216 /* t0 = (condition true ? t0 : 0). */
3217 cond = (a->u
3218 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3219 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3220 tcg_gen_movi_i64(t1, 0);
3221 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3222
3223 t2 = tcg_temp_new_i32();
3224 tcg_gen_extrl_i64_i32(t2, t0);
3225 tcg_temp_free_i64(t0);
3226 tcg_temp_free_i64(t1);
3227
3228 desc = (vsz / 8) - 2;
3229 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3230 t3 = tcg_const_i32(desc);
3231
3232 ptr = tcg_temp_new_ptr();
3233 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3234
3235 gen_helper_sve_while(t2, ptr, t2, t3);
3236 do_pred_flags(t2);
3237
3238 tcg_temp_free_ptr(ptr);
3239 tcg_temp_free_i32(t2);
3240 tcg_temp_free_i32(t3);
3241 return true;
3242}
3243
ed491961
RH
3244/*
3245 *** SVE Integer Wide Immediate - Unpredicated Group
3246 */
3247
3248static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3249{
3250 if (a->esz == 0) {
3251 return false;
3252 }
3253 if (sve_access_check(s)) {
3254 unsigned vsz = vec_full_reg_size(s);
3255 int dofs = vec_full_reg_offset(s, a->rd);
3256 uint64_t imm;
3257
3258 /* Decode the VFP immediate. */
3259 imm = vfp_expand_imm(a->esz, a->imm);
3260 imm = dup_const(a->esz, imm);
3261
3262 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3263 }
3264 return true;
3265}
3266
3267static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3268{
3269 if (a->esz == 0 && extract32(insn, 13, 1)) {
3270 return false;
3271 }
3272 if (sve_access_check(s)) {
3273 unsigned vsz = vec_full_reg_size(s);
3274 int dofs = vec_full_reg_offset(s, a->rd);
3275
3276 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3277 }
3278 return true;
3279}
3280
6e6a157d
RH
3281static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3282{
3283 if (a->esz == 0 && extract32(insn, 13, 1)) {
3284 return false;
3285 }
3286 if (sve_access_check(s)) {
3287 unsigned vsz = vec_full_reg_size(s);
3288 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3289 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3290 }
3291 return true;
3292}
3293
3294static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3295{
3296 a->imm = -a->imm;
3297 return trans_ADD_zzi(s, a, insn);
3298}
3299
3300static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3301{
3302 static const GVecGen2s op[4] = {
3303 { .fni8 = tcg_gen_vec_sub8_i64,
3304 .fniv = tcg_gen_sub_vec,
3305 .fno = gen_helper_sve_subri_b,
3306 .opc = INDEX_op_sub_vec,
3307 .vece = MO_8,
3308 .scalar_first = true },
3309 { .fni8 = tcg_gen_vec_sub16_i64,
3310 .fniv = tcg_gen_sub_vec,
3311 .fno = gen_helper_sve_subri_h,
3312 .opc = INDEX_op_sub_vec,
3313 .vece = MO_16,
3314 .scalar_first = true },
3315 { .fni4 = tcg_gen_sub_i32,
3316 .fniv = tcg_gen_sub_vec,
3317 .fno = gen_helper_sve_subri_s,
3318 .opc = INDEX_op_sub_vec,
3319 .vece = MO_32,
3320 .scalar_first = true },
3321 { .fni8 = tcg_gen_sub_i64,
3322 .fniv = tcg_gen_sub_vec,
3323 .fno = gen_helper_sve_subri_d,
3324 .opc = INDEX_op_sub_vec,
3325 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3326 .vece = MO_64,
3327 .scalar_first = true }
3328 };
3329
3330 if (a->esz == 0 && extract32(insn, 13, 1)) {
3331 return false;
3332 }
3333 if (sve_access_check(s)) {
3334 unsigned vsz = vec_full_reg_size(s);
3335 TCGv_i64 c = tcg_const_i64(a->imm);
3336 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3337 vec_full_reg_offset(s, a->rn),
3338 vsz, vsz, c, &op[a->esz]);
3339 tcg_temp_free_i64(c);
3340 }
3341 return true;
3342}
3343
3344static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3345{
3346 if (sve_access_check(s)) {
3347 unsigned vsz = vec_full_reg_size(s);
3348 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3349 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3350 }
3351 return true;
3352}
3353
3354static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3355 bool u, bool d)
3356{
3357 if (a->esz == 0 && extract32(insn, 13, 1)) {
3358 return false;
3359 }
3360 if (sve_access_check(s)) {
3361 TCGv_i64 val = tcg_const_i64(a->imm);
3362 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3363 tcg_temp_free_i64(val);
3364 }
3365 return true;
3366}
3367
3368static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3369{
3370 return do_zzi_sat(s, a, insn, false, false);
3371}
3372
3373static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3374{
3375 return do_zzi_sat(s, a, insn, true, false);
3376}
3377
3378static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3379{
3380 return do_zzi_sat(s, a, insn, false, true);
3381}
3382
3383static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3384{
3385 return do_zzi_sat(s, a, insn, true, true);
3386}
3387
3388static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3389{
3390 if (sve_access_check(s)) {
3391 unsigned vsz = vec_full_reg_size(s);
3392 TCGv_i64 c = tcg_const_i64(a->imm);
3393
3394 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3395 vec_full_reg_offset(s, a->rn),
3396 c, vsz, vsz, 0, fn);
3397 tcg_temp_free_i64(c);
3398 }
3399 return true;
3400}
3401
3402#define DO_ZZI(NAME, name) \
3403static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3404 uint32_t insn) \
3405{ \
3406 static gen_helper_gvec_2i * const fns[4] = { \
3407 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3408 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3409 }; \
3410 return do_zzi_ool(s, a, fns[a->esz]); \
3411}
3412
3413DO_ZZI(SMAX, smax)
3414DO_ZZI(UMAX, umax)
3415DO_ZZI(SMIN, smin)
3416DO_ZZI(UMIN, umin)
3417
3418#undef DO_ZZI
3419
d730ecaa
RH
3420static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a, uint32_t insn)
3421{
3422 static gen_helper_gvec_3 * const fns[2][2] = {
3423 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3424 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3425 };
3426
3427 if (sve_access_check(s)) {
3428 unsigned vsz = vec_full_reg_size(s);
3429 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3430 vec_full_reg_offset(s, a->rn),
3431 vec_full_reg_offset(s, a->rm),
3432 vsz, vsz, 0, fns[a->u][a->sz]);
3433 }
3434 return true;
3435}
3436
16fcfdc7
RH
3437static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a, uint32_t insn)
3438{
3439 static gen_helper_gvec_3 * const fns[2][2] = {
3440 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3441 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3442 };
3443
3444 if (sve_access_check(s)) {
3445 unsigned vsz = vec_full_reg_size(s);
3446 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3447 vec_full_reg_offset(s, a->rn),
3448 vec_full_reg_offset(s, a->rm),
3449 vsz, vsz, a->index, fns[a->u][a->sz]);
3450 }
3451 return true;
3452}
3453
3454
ca40a6e6
RH
3455/*
3456 *** SVE Floating Point Multiply-Add Indexed Group
3457 */
3458
3459static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3460{
3461 static gen_helper_gvec_4_ptr * const fns[3] = {
3462 gen_helper_gvec_fmla_idx_h,
3463 gen_helper_gvec_fmla_idx_s,
3464 gen_helper_gvec_fmla_idx_d,
3465 };
3466
3467 if (sve_access_check(s)) {
3468 unsigned vsz = vec_full_reg_size(s);
3469 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3470 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3471 vec_full_reg_offset(s, a->rn),
3472 vec_full_reg_offset(s, a->rm),
3473 vec_full_reg_offset(s, a->ra),
3474 status, vsz, vsz, (a->index << 1) | a->sub,
3475 fns[a->esz - 1]);
3476 tcg_temp_free_ptr(status);
3477 }
3478 return true;
3479}
3480
3481/*
3482 *** SVE Floating Point Multiply Indexed Group
3483 */
3484
3485static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3486{
3487 static gen_helper_gvec_3_ptr * const fns[3] = {
3488 gen_helper_gvec_fmul_idx_h,
3489 gen_helper_gvec_fmul_idx_s,
3490 gen_helper_gvec_fmul_idx_d,
3491 };
3492
3493 if (sve_access_check(s)) {
3494 unsigned vsz = vec_full_reg_size(s);
3495 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3496 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3497 vec_full_reg_offset(s, a->rn),
3498 vec_full_reg_offset(s, a->rm),
3499 status, vsz, vsz, a->index, fns[a->esz - 1]);
3500 tcg_temp_free_ptr(status);
3501 }
3502 return true;
3503}
3504
23fbe79f
RH
3505/*
3506 *** SVE Floating Point Fast Reduction Group
3507 */
3508
3509typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3510 TCGv_ptr, TCGv_i32);
3511
3512static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3513 gen_helper_fp_reduce *fn)
3514{
3515 unsigned vsz = vec_full_reg_size(s);
3516 unsigned p2vsz = pow2ceil(vsz);
3517 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3518 TCGv_ptr t_zn, t_pg, status;
3519 TCGv_i64 temp;
3520
3521 temp = tcg_temp_new_i64();
3522 t_zn = tcg_temp_new_ptr();
3523 t_pg = tcg_temp_new_ptr();
3524
3525 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3526 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3527 status = get_fpstatus_ptr(a->esz == MO_16);
3528
3529 fn(temp, t_zn, t_pg, status, t_desc);
3530 tcg_temp_free_ptr(t_zn);
3531 tcg_temp_free_ptr(t_pg);
3532 tcg_temp_free_ptr(status);
3533 tcg_temp_free_i32(t_desc);
3534
3535 write_fp_dreg(s, a->rd, temp);
3536 tcg_temp_free_i64(temp);
3537}
3538
3539#define DO_VPZ(NAME, name) \
3540static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3541{ \
3542 static gen_helper_fp_reduce * const fns[3] = { \
3543 gen_helper_sve_##name##_h, \
3544 gen_helper_sve_##name##_s, \
3545 gen_helper_sve_##name##_d, \
3546 }; \
3547 if (a->esz == 0) { \
3548 return false; \
3549 } \
3550 if (sve_access_check(s)) { \
3551 do_reduce(s, a, fns[a->esz - 1]); \
3552 } \
3553 return true; \
3554}
3555
3556DO_VPZ(FADDV, faddv)
3557DO_VPZ(FMINNMV, fminnmv)
3558DO_VPZ(FMAXNMV, fmaxnmv)
3559DO_VPZ(FMINV, fminv)
3560DO_VPZ(FMAXV, fmaxv)
3561
3887c038
RH
3562/*
3563 *** SVE Floating Point Unary Operations - Unpredicated Group
3564 */
3565
3566static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3567{
3568 unsigned vsz = vec_full_reg_size(s);
3569 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3570
3571 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3572 vec_full_reg_offset(s, a->rn),
3573 status, vsz, vsz, 0, fn);
3574 tcg_temp_free_ptr(status);
3575}
3576
3577static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3578{
3579 static gen_helper_gvec_2_ptr * const fns[3] = {
3580 gen_helper_gvec_frecpe_h,
3581 gen_helper_gvec_frecpe_s,
3582 gen_helper_gvec_frecpe_d,
3583 };
3584 if (a->esz == 0) {
3585 return false;
3586 }
3587 if (sve_access_check(s)) {
3588 do_zz_fp(s, a, fns[a->esz - 1]);
3589 }
3590 return true;
3591}
3592
3593static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3594{
3595 static gen_helper_gvec_2_ptr * const fns[3] = {
3596 gen_helper_gvec_frsqrte_h,
3597 gen_helper_gvec_frsqrte_s,
3598 gen_helper_gvec_frsqrte_d,
3599 };
3600 if (a->esz == 0) {
3601 return false;
3602 }
3603 if (sve_access_check(s)) {
3604 do_zz_fp(s, a, fns[a->esz - 1]);
3605 }
3606 return true;
3607}
3608
4d2e2a03
RH
3609/*
3610 *** SVE Floating Point Compare with Zero Group
3611 */
3612
3613static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3614 gen_helper_gvec_3_ptr *fn)
3615{
3616 unsigned vsz = vec_full_reg_size(s);
3617 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3618
3619 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3620 vec_full_reg_offset(s, a->rn),
3621 pred_full_reg_offset(s, a->pg),
3622 status, vsz, vsz, 0, fn);
3623 tcg_temp_free_ptr(status);
3624}
3625
3626#define DO_PPZ(NAME, name) \
3627static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3628{ \
3629 static gen_helper_gvec_3_ptr * const fns[3] = { \
3630 gen_helper_sve_##name##_h, \
3631 gen_helper_sve_##name##_s, \
3632 gen_helper_sve_##name##_d, \
3633 }; \
3634 if (a->esz == 0) { \
3635 return false; \
3636 } \
3637 if (sve_access_check(s)) { \
3638 do_ppz_fp(s, a, fns[a->esz - 1]); \
3639 } \
3640 return true; \
3641}
3642
3643DO_PPZ(FCMGE_ppz0, fcmge0)
3644DO_PPZ(FCMGT_ppz0, fcmgt0)
3645DO_PPZ(FCMLE_ppz0, fcmle0)
3646DO_PPZ(FCMLT_ppz0, fcmlt0)
3647DO_PPZ(FCMEQ_ppz0, fcmeq0)
3648DO_PPZ(FCMNE_ppz0, fcmne0)
3649
3650#undef DO_PPZ
3651
67fcd9ad
RH
3652/*
3653 *** SVE floating-point trig multiply-add coefficient
3654 */
3655
3656static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a, uint32_t insn)
3657{
3658 static gen_helper_gvec_3_ptr * const fns[3] = {
3659 gen_helper_sve_ftmad_h,
3660 gen_helper_sve_ftmad_s,
3661 gen_helper_sve_ftmad_d,
3662 };
3663
3664 if (a->esz == 0) {
3665 return false;
3666 }
3667 if (sve_access_check(s)) {
3668 unsigned vsz = vec_full_reg_size(s);
3669 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3670 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3671 vec_full_reg_offset(s, a->rn),
3672 vec_full_reg_offset(s, a->rm),
3673 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3674 tcg_temp_free_ptr(status);
3675 }
3676 return true;
3677}
3678
7f9ddf64
RH
3679/*
3680 *** SVE Floating Point Accumulating Reduction Group
3681 */
3682
3683static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3684{
3685 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3686 TCGv_ptr, TCGv_ptr, TCGv_i32);
3687 static fadda_fn * const fns[3] = {
3688 gen_helper_sve_fadda_h,
3689 gen_helper_sve_fadda_s,
3690 gen_helper_sve_fadda_d,
3691 };
3692 unsigned vsz = vec_full_reg_size(s);
3693 TCGv_ptr t_rm, t_pg, t_fpst;
3694 TCGv_i64 t_val;
3695 TCGv_i32 t_desc;
3696
3697 if (a->esz == 0) {
3698 return false;
3699 }
3700 if (!sve_access_check(s)) {
3701 return true;
3702 }
3703
3704 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3705 t_rm = tcg_temp_new_ptr();
3706 t_pg = tcg_temp_new_ptr();
3707 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3708 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3709 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3710 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3711
3712 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3713
3714 tcg_temp_free_i32(t_desc);
3715 tcg_temp_free_ptr(t_fpst);
3716 tcg_temp_free_ptr(t_pg);
3717 tcg_temp_free_ptr(t_rm);
3718
3719 write_fp_dreg(s, a->rd, t_val);
3720 tcg_temp_free_i64(t_val);
3721 return true;
3722}
3723
29b80469
RH
3724/*
3725 *** SVE Floating Point Arithmetic - Unpredicated Group
3726 */
3727
3728static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3729 gen_helper_gvec_3_ptr *fn)
3730{
3731 if (fn == NULL) {
3732 return false;
3733 }
3734 if (sve_access_check(s)) {
3735 unsigned vsz = vec_full_reg_size(s);
3736 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3737 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3738 vec_full_reg_offset(s, a->rn),
3739 vec_full_reg_offset(s, a->rm),
3740 status, vsz, vsz, 0, fn);
3741 tcg_temp_free_ptr(status);
3742 }
3743 return true;
3744}
3745
3746
3747#define DO_FP3(NAME, name) \
3748static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3749{ \
3750 static gen_helper_gvec_3_ptr * const fns[4] = { \
3751 NULL, gen_helper_gvec_##name##_h, \
3752 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3753 }; \
3754 return do_zzz_fp(s, a, fns[a->esz]); \
3755}
3756
3757DO_FP3(FADD_zzz, fadd)
3758DO_FP3(FSUB_zzz, fsub)
3759DO_FP3(FMUL_zzz, fmul)
3760DO_FP3(FTSMUL, ftsmul)
3761DO_FP3(FRECPS, recps)
3762DO_FP3(FRSQRTS, rsqrts)
3763
3764#undef DO_FP3
3765
ec3b87c2
RH
3766/*
3767 *** SVE Floating Point Arithmetic - Predicated Group
3768 */
3769
3770static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3771 gen_helper_gvec_4_ptr *fn)
3772{
3773 if (fn == NULL) {
3774 return false;
3775 }
3776 if (sve_access_check(s)) {
3777 unsigned vsz = vec_full_reg_size(s);
3778 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3779 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3780 vec_full_reg_offset(s, a->rn),
3781 vec_full_reg_offset(s, a->rm),
3782 pred_full_reg_offset(s, a->pg),
3783 status, vsz, vsz, 0, fn);
3784 tcg_temp_free_ptr(status);
3785 }
3786 return true;
3787}
3788
3789#define DO_FP3(NAME, name) \
3790static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3791{ \
3792 static gen_helper_gvec_4_ptr * const fns[4] = { \
3793 NULL, gen_helper_sve_##name##_h, \
3794 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3795 }; \
3796 return do_zpzz_fp(s, a, fns[a->esz]); \
3797}
3798
3799DO_FP3(FADD_zpzz, fadd)
3800DO_FP3(FSUB_zpzz, fsub)
3801DO_FP3(FMUL_zpzz, fmul)
3802DO_FP3(FMIN_zpzz, fmin)
3803DO_FP3(FMAX_zpzz, fmax)
3804DO_FP3(FMINNM_zpzz, fminnum)
3805DO_FP3(FMAXNM_zpzz, fmaxnum)
3806DO_FP3(FABD, fabd)
3807DO_FP3(FSCALE, fscalbn)
3808DO_FP3(FDIV, fdiv)
3809DO_FP3(FMULX, fmulx)
3810
3811#undef DO_FP3
8092c6a3 3812
cc48affe
RH
3813typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3814 TCGv_i64, TCGv_ptr, TCGv_i32);
3815
3816static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3817 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3818{
3819 unsigned vsz = vec_full_reg_size(s);
3820 TCGv_ptr t_zd, t_zn, t_pg, status;
3821 TCGv_i32 desc;
3822
3823 t_zd = tcg_temp_new_ptr();
3824 t_zn = tcg_temp_new_ptr();
3825 t_pg = tcg_temp_new_ptr();
3826 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3827 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3828 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3829
3830 status = get_fpstatus_ptr(is_fp16);
3831 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3832 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3833
3834 tcg_temp_free_i32(desc);
3835 tcg_temp_free_ptr(status);
3836 tcg_temp_free_ptr(t_pg);
3837 tcg_temp_free_ptr(t_zn);
3838 tcg_temp_free_ptr(t_zd);
3839}
3840
3841static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3842 gen_helper_sve_fp2scalar *fn)
3843{
3844 TCGv_i64 temp = tcg_const_i64(imm);
3845 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3846 tcg_temp_free_i64(temp);
3847}
3848
3849#define DO_FP_IMM(NAME, name, const0, const1) \
3850static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a, \
3851 uint32_t insn) \
3852{ \
3853 static gen_helper_sve_fp2scalar * const fns[3] = { \
3854 gen_helper_sve_##name##_h, \
3855 gen_helper_sve_##name##_s, \
3856 gen_helper_sve_##name##_d \
3857 }; \
3858 static uint64_t const val[3][2] = { \
3859 { float16_##const0, float16_##const1 }, \
3860 { float32_##const0, float32_##const1 }, \
3861 { float64_##const0, float64_##const1 }, \
3862 }; \
3863 if (a->esz == 0) { \
3864 return false; \
3865 } \
3866 if (sve_access_check(s)) { \
3867 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3868 } \
3869 return true; \
3870}
3871
3872#define float16_two make_float16(0x4000)
3873#define float32_two make_float32(0x40000000)
3874#define float64_two make_float64(0x4000000000000000ULL)
3875
3876DO_FP_IMM(FADD, fadds, half, one)
3877DO_FP_IMM(FSUB, fsubs, half, one)
3878DO_FP_IMM(FMUL, fmuls, half, two)
3879DO_FP_IMM(FSUBR, fsubrs, half, one)
3880DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3881DO_FP_IMM(FMINNM, fminnms, zero, one)
3882DO_FP_IMM(FMAX, fmaxs, zero, one)
3883DO_FP_IMM(FMIN, fmins, zero, one)
3884
3885#undef DO_FP_IMM
3886
abfdefd5
RH
3887static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3888 gen_helper_gvec_4_ptr *fn)
3889{
3890 if (fn == NULL) {
3891 return false;
3892 }
3893 if (sve_access_check(s)) {
3894 unsigned vsz = vec_full_reg_size(s);
3895 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3896 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3897 vec_full_reg_offset(s, a->rn),
3898 vec_full_reg_offset(s, a->rm),
3899 pred_full_reg_offset(s, a->pg),
3900 status, vsz, vsz, 0, fn);
3901 tcg_temp_free_ptr(status);
3902 }
3903 return true;
3904}
3905
3906#define DO_FPCMP(NAME, name) \
3907static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
3908 uint32_t insn) \
3909{ \
3910 static gen_helper_gvec_4_ptr * const fns[4] = { \
3911 NULL, gen_helper_sve_##name##_h, \
3912 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3913 }; \
3914 return do_fp_cmp(s, a, fns[a->esz]); \
3915}
3916
3917DO_FPCMP(FCMGE, fcmge)
3918DO_FPCMP(FCMGT, fcmgt)
3919DO_FPCMP(FCMEQ, fcmeq)
3920DO_FPCMP(FCMNE, fcmne)
3921DO_FPCMP(FCMUO, fcmuo)
3922DO_FPCMP(FACGE, facge)
3923DO_FPCMP(FACGT, facgt)
3924
3925#undef DO_FPCMP
3926
76a9d9cd
RH
3927static bool trans_FCADD(DisasContext *s, arg_FCADD *a, uint32_t insn)
3928{
3929 static gen_helper_gvec_4_ptr * const fns[3] = {
3930 gen_helper_sve_fcadd_h,
3931 gen_helper_sve_fcadd_s,
3932 gen_helper_sve_fcadd_d
3933 };
3934
3935 if (a->esz == 0) {
3936 return false;
3937 }
3938 if (sve_access_check(s)) {
3939 unsigned vsz = vec_full_reg_size(s);
3940 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3941 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3942 vec_full_reg_offset(s, a->rn),
3943 vec_full_reg_offset(s, a->rm),
3944 pred_full_reg_offset(s, a->pg),
3945 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3946 tcg_temp_free_ptr(status);
3947 }
3948 return true;
3949}
3950
6ceabaad
RH
3951typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3952
3953static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3954{
3955 if (fn == NULL) {
3956 return false;
3957 }
3958 if (!sve_access_check(s)) {
3959 return true;
3960 }
3961
3962 unsigned vsz = vec_full_reg_size(s);
3963 unsigned desc;
3964 TCGv_i32 t_desc;
3965 TCGv_ptr pg = tcg_temp_new_ptr();
3966
3967 /* We would need 7 operands to pass these arguments "properly".
3968 * So we encode all the register numbers into the descriptor.
3969 */
3970 desc = deposit32(a->rd, 5, 5, a->rn);
3971 desc = deposit32(desc, 10, 5, a->rm);
3972 desc = deposit32(desc, 15, 5, a->ra);
3973 desc = simd_desc(vsz, vsz, desc);
3974
3975 t_desc = tcg_const_i32(desc);
3976 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3977 fn(cpu_env, pg, t_desc);
3978 tcg_temp_free_i32(t_desc);
3979 tcg_temp_free_ptr(pg);
3980 return true;
3981}
3982
3983#define DO_FMLA(NAME, name) \
3984static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3985{ \
3986 static gen_helper_sve_fmla * const fns[4] = { \
3987 NULL, gen_helper_sve_##name##_h, \
3988 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3989 }; \
3990 return do_fmla(s, a, fns[a->esz]); \
3991}
3992
3993DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3994DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3995DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3996DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3997
3998#undef DO_FMLA
3999
05f48bab
RH
4000static bool trans_FCMLA_zpzzz(DisasContext *s,
4001 arg_FCMLA_zpzzz *a, uint32_t insn)
4002{
4003 static gen_helper_sve_fmla * const fns[3] = {
4004 gen_helper_sve_fcmla_zpzzz_h,
4005 gen_helper_sve_fcmla_zpzzz_s,
4006 gen_helper_sve_fcmla_zpzzz_d,
4007 };
4008
4009 if (a->esz == 0) {
4010 return false;
4011 }
4012 if (sve_access_check(s)) {
4013 unsigned vsz = vec_full_reg_size(s);
4014 unsigned desc;
4015 TCGv_i32 t_desc;
4016 TCGv_ptr pg = tcg_temp_new_ptr();
4017
4018 /* We would need 7 operands to pass these arguments "properly".
4019 * So we encode all the register numbers into the descriptor.
4020 */
4021 desc = deposit32(a->rd, 5, 5, a->rn);
4022 desc = deposit32(desc, 10, 5, a->rm);
4023 desc = deposit32(desc, 15, 5, a->ra);
4024 desc = deposit32(desc, 20, 2, a->rot);
4025 desc = sextract32(desc, 0, 22);
4026 desc = simd_desc(vsz, vsz, desc);
4027
4028 t_desc = tcg_const_i32(desc);
4029 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4030 fns[a->esz - 1](cpu_env, pg, t_desc);
4031 tcg_temp_free_i32(t_desc);
4032 tcg_temp_free_ptr(pg);
4033 }
4034 return true;
4035}
4036
18fc2405
RH
4037static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a, uint32_t insn)
4038{
4039 static gen_helper_gvec_3_ptr * const fns[2] = {
4040 gen_helper_gvec_fcmlah_idx,
4041 gen_helper_gvec_fcmlas_idx,
4042 };
4043
4044 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4045 tcg_debug_assert(a->rd == a->ra);
4046 if (sve_access_check(s)) {
4047 unsigned vsz = vec_full_reg_size(s);
4048 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4049 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4050 vec_full_reg_offset(s, a->rn),
4051 vec_full_reg_offset(s, a->rm),
4052 status, vsz, vsz,
4053 a->index * 4 + a->rot,
4054 fns[a->esz - 1]);
4055 tcg_temp_free_ptr(status);
4056 }
4057 return true;
4058}
4059
8092c6a3
RH
4060/*
4061 *** SVE Floating Point Unary Operations Predicated Group
4062 */
4063
4064static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4065 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4066{
4067 if (sve_access_check(s)) {
4068 unsigned vsz = vec_full_reg_size(s);
4069 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4070 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4071 vec_full_reg_offset(s, rn),
4072 pred_full_reg_offset(s, pg),
4073 status, vsz, vsz, 0, fn);
4074 tcg_temp_free_ptr(status);
4075 }
4076 return true;
4077}
4078
46d33d1e
RH
4079static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4080{
4081 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_sh);
4082}
4083
4084static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4085{
4086 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4087}
4088
4089static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4090{
4091 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_dh);
4092}
4093
4094static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4095{
4096 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4097}
4098
4099static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4100{
4101 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4102}
4103
4104static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4105{
4106 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4107}
4108
df4de1af
RH
4109static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4110{
4111 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4112}
4113
4114static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4115{
4116 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4117}
4118
4119static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4120{
4121 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4122}
4123
4124static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4125{
4126 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4127}
4128
4129static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4130{
4131 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4132}
4133
4134static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4135{
4136 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4137}
4138
4139static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4140{
4141 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4142}
4143
4144static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4145{
4146 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4147}
4148
4149static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4150{
4151 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4152}
4153
4154static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4155{
4156 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4157}
4158
4159static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4160{
4161 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4162}
4163
4164static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4165{
4166 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4167}
4168
4169static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4170{
4171 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4172}
4173
4174static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4175{
4176 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4177}
4178
cda3c753
RH
4179static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4180 gen_helper_sve_frint_h,
4181 gen_helper_sve_frint_s,
4182 gen_helper_sve_frint_d
4183};
4184
4185static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4186{
4187 if (a->esz == 0) {
4188 return false;
4189 }
4190 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4191 frint_fns[a->esz - 1]);
4192}
4193
4194static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4195{
4196 static gen_helper_gvec_3_ptr * const fns[3] = {
4197 gen_helper_sve_frintx_h,
4198 gen_helper_sve_frintx_s,
4199 gen_helper_sve_frintx_d
4200 };
4201 if (a->esz == 0) {
4202 return false;
4203 }
4204 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4205}
4206
4207static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4208{
4209 if (a->esz == 0) {
4210 return false;
4211 }
4212 if (sve_access_check(s)) {
4213 unsigned vsz = vec_full_reg_size(s);
4214 TCGv_i32 tmode = tcg_const_i32(mode);
4215 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4216
4217 gen_helper_set_rmode(tmode, tmode, status);
4218
4219 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4220 vec_full_reg_offset(s, a->rn),
4221 pred_full_reg_offset(s, a->pg),
4222 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4223
4224 gen_helper_set_rmode(tmode, tmode, status);
4225 tcg_temp_free_i32(tmode);
4226 tcg_temp_free_ptr(status);
4227 }
4228 return true;
4229}
4230
4231static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4232{
4233 return do_frint_mode(s, a, float_round_nearest_even);
4234}
4235
4236static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4237{
4238 return do_frint_mode(s, a, float_round_up);
4239}
4240
4241static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4242{
4243 return do_frint_mode(s, a, float_round_down);
4244}
4245
4246static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4247{
4248 return do_frint_mode(s, a, float_round_to_zero);
4249}
4250
4251static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4252{
4253 return do_frint_mode(s, a, float_round_ties_away);
4254}
4255
ec5b375b
RH
4256static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4257{
4258 static gen_helper_gvec_3_ptr * const fns[3] = {
4259 gen_helper_sve_frecpx_h,
4260 gen_helper_sve_frecpx_s,
4261 gen_helper_sve_frecpx_d
4262 };
4263 if (a->esz == 0) {
4264 return false;
4265 }
4266 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4267}
4268
4269static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4270{
4271 static gen_helper_gvec_3_ptr * const fns[3] = {
4272 gen_helper_sve_fsqrt_h,
4273 gen_helper_sve_fsqrt_s,
4274 gen_helper_sve_fsqrt_d
4275 };
4276 if (a->esz == 0) {
4277 return false;
4278 }
4279 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4280}
4281
8092c6a3
RH
4282static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4283{
4284 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4285}
4286
4287static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4288{
4289 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4290}
4291
4292static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4293{
4294 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4295}
4296
4297static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4298{
4299 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4300}
4301
4302static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4303{
4304 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4305}
4306
4307static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4308{
4309 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4310}
4311
4312static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4313{
4314 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4315}
4316
4317static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4318{
4319 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4320}
4321
4322static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4323{
4324 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4325}
4326
4327static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4328{
4329 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4330}
4331
4332static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4333{
4334 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4335}
4336
4337static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4338{
4339 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4340}
4341
4342static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4343{
4344 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4345}
4346
4347static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4348{
4349 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4350}
4351
d1822297
RH
4352/*
4353 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4354 */
4355
4356/* Subroutine loading a vector register at VOFS of LEN bytes.
4357 * The load should begin at the address Rn + IMM.
4358 */
4359
4360static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
4361 int rn, int imm)
4362{
4363 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4364 uint32_t len_remain = len % 8;
4365 uint32_t nparts = len / 8 + ctpop8(len_remain);
4366 int midx = get_mem_index(s);
4367 TCGv_i64 addr, t0, t1;
4368
4369 addr = tcg_temp_new_i64();
4370 t0 = tcg_temp_new_i64();
4371
4372 /* Note that unpredicated load/store of vector/predicate registers
4373 * are defined as a stream of bytes, which equates to little-endian
4374 * operations on larger quantities. There is no nice way to force
4375 * a little-endian load for aarch64_be-linux-user out of line.
4376 *
4377 * Attempt to keep code expansion to a minimum by limiting the
4378 * amount of unrolling done.
4379 */
4380 if (nparts <= 4) {
4381 int i;
4382
4383 for (i = 0; i < len_align; i += 8) {
4384 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4385 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4386 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4387 }
4388 } else {
4389 TCGLabel *loop = gen_new_label();
4390 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4391
4392 gen_set_label(loop);
4393
4394 /* Minimize the number of local temps that must be re-read from
4395 * the stack each iteration. Instead, re-compute values other
4396 * than the loop counter.
4397 */
4398 tp = tcg_temp_new_ptr();
4399 tcg_gen_addi_ptr(tp, i, imm);
4400 tcg_gen_extu_ptr_i64(addr, tp);
4401 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4402
4403 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4404
4405 tcg_gen_add_ptr(tp, cpu_env, i);
4406 tcg_gen_addi_ptr(i, i, 8);
4407 tcg_gen_st_i64(t0, tp, vofs);
4408 tcg_temp_free_ptr(tp);
4409
4410 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4411 tcg_temp_free_ptr(i);
4412 }
4413
4414 /* Predicate register loads can be any multiple of 2.
4415 * Note that we still store the entire 64-bit unit into cpu_env.
4416 */
4417 if (len_remain) {
4418 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4419
4420 switch (len_remain) {
4421 case 2:
4422 case 4:
4423 case 8:
4424 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4425 break;
4426
4427 case 6:
4428 t1 = tcg_temp_new_i64();
4429 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4430 tcg_gen_addi_i64(addr, addr, 4);
4431 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4432 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4433 tcg_temp_free_i64(t1);
4434 break;
4435
4436 default:
4437 g_assert_not_reached();
4438 }
4439 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4440 }
4441 tcg_temp_free_i64(addr);
4442 tcg_temp_free_i64(t0);
4443}
4444
5047c204
RH
4445/* Similarly for stores. */
4446static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
4447 int rn, int imm)
4448{
4449 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4450 uint32_t len_remain = len % 8;
4451 uint32_t nparts = len / 8 + ctpop8(len_remain);
4452 int midx = get_mem_index(s);
4453 TCGv_i64 addr, t0;
4454
4455 addr = tcg_temp_new_i64();
4456 t0 = tcg_temp_new_i64();
4457
4458 /* Note that unpredicated load/store of vector/predicate registers
4459 * are defined as a stream of bytes, which equates to little-endian
4460 * operations on larger quantities. There is no nice way to force
4461 * a little-endian store for aarch64_be-linux-user out of line.
4462 *
4463 * Attempt to keep code expansion to a minimum by limiting the
4464 * amount of unrolling done.
4465 */
4466 if (nparts <= 4) {
4467 int i;
4468
4469 for (i = 0; i < len_align; i += 8) {
4470 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4471 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4472 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4473 }
4474 } else {
4475 TCGLabel *loop = gen_new_label();
4476 TCGv_ptr t2, i = tcg_const_local_ptr(0);
4477
4478 gen_set_label(loop);
4479
4480 t2 = tcg_temp_new_ptr();
4481 tcg_gen_add_ptr(t2, cpu_env, i);
4482 tcg_gen_ld_i64(t0, t2, vofs);
4483
4484 /* Minimize the number of local temps that must be re-read from
4485 * the stack each iteration. Instead, re-compute values other
4486 * than the loop counter.
4487 */
4488 tcg_gen_addi_ptr(t2, i, imm);
4489 tcg_gen_extu_ptr_i64(addr, t2);
4490 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4491 tcg_temp_free_ptr(t2);
4492
4493 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4494
4495 tcg_gen_addi_ptr(i, i, 8);
4496
4497 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4498 tcg_temp_free_ptr(i);
4499 }
4500
4501 /* Predicate register stores can be any multiple of 2. */
4502 if (len_remain) {
4503 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4504 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4505
4506 switch (len_remain) {
4507 case 2:
4508 case 4:
4509 case 8:
4510 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4511 break;
4512
4513 case 6:
4514 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4515 tcg_gen_addi_i64(addr, addr, 4);
4516 tcg_gen_shri_i64(t0, t0, 32);
4517 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4518 break;
4519
4520 default:
4521 g_assert_not_reached();
4522 }
4523 }
4524 tcg_temp_free_i64(addr);
4525 tcg_temp_free_i64(t0);
4526}
4527
d1822297
RH
4528static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4529{
4530 if (sve_access_check(s)) {
4531 int size = vec_full_reg_size(s);
4532 int off = vec_full_reg_offset(s, a->rd);
4533 do_ldr(s, off, size, a->rn, a->imm * size);
4534 }
4535 return true;
4536}
4537
4538static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4539{
4540 if (sve_access_check(s)) {
4541 int size = pred_full_reg_size(s);
4542 int off = pred_full_reg_offset(s, a->rd);
4543 do_ldr(s, off, size, a->rn, a->imm * size);
4544 }
4545 return true;
4546}
c4e7c493 4547
5047c204
RH
4548static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4549{
4550 if (sve_access_check(s)) {
4551 int size = vec_full_reg_size(s);
4552 int off = vec_full_reg_offset(s, a->rd);
4553 do_str(s, off, size, a->rn, a->imm * size);
4554 }
4555 return true;
4556}
4557
4558static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4559{
4560 if (sve_access_check(s)) {
4561 int size = pred_full_reg_size(s);
4562 int off = pred_full_reg_offset(s, a->rd);
4563 do_str(s, off, size, a->rn, a->imm * size);
4564 }
4565 return true;
4566}
4567
c4e7c493
RH
4568/*
4569 *** SVE Memory - Contiguous Load Group
4570 */
4571
4572/* The memory mode of the dtype. */
4573static const TCGMemOp dtype_mop[16] = {
4574 MO_UB, MO_UB, MO_UB, MO_UB,
4575 MO_SL, MO_UW, MO_UW, MO_UW,
4576 MO_SW, MO_SW, MO_UL, MO_UL,
4577 MO_SB, MO_SB, MO_SB, MO_Q
4578};
4579
4580#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4581
4582/* The vector element size of dtype. */
4583static const uint8_t dtype_esz[16] = {
4584 0, 1, 2, 3,
4585 3, 1, 2, 3,
4586 3, 2, 2, 3,
4587 3, 2, 1, 3
4588};
4589
4590static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4591 gen_helper_gvec_mem *fn)
4592{
4593 unsigned vsz = vec_full_reg_size(s);
4594 TCGv_ptr t_pg;
4595 TCGv_i32 desc;
4596
4597 /* For e.g. LD4, there are not enough arguments to pass all 4
4598 * registers as pointers, so encode the regno into the data field.
4599 * For consistency, do this even for LD1.
4600 */
4601 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4602 t_pg = tcg_temp_new_ptr();
4603
4604 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4605 fn(cpu_env, t_pg, addr, desc);
4606
4607 tcg_temp_free_ptr(t_pg);
4608 tcg_temp_free_i32(desc);
4609}
4610
4611static void do_ld_zpa(DisasContext *s, int zt, int pg,
4612 TCGv_i64 addr, int dtype, int nreg)
4613{
4614 static gen_helper_gvec_mem * const fns[16][4] = {
4615 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4616 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4617 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4618 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4619 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4620
4621 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4622 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4623 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4624 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4625 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4626
4627 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4628 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4629 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4630 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4631 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4632
4633 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4634 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4635 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4636 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4637 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4638 };
4639 gen_helper_gvec_mem *fn = fns[dtype][nreg];
4640
4641 /* While there are holes in the table, they are not
4642 * accessible via the instruction encoding.
4643 */
4644 assert(fn != NULL);
4645 do_mem_zpa(s, zt, pg, addr, fn);
4646}
4647
4648static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4649{
4650 if (a->rm == 31) {
4651 return false;
4652 }
4653 if (sve_access_check(s)) {
4654 TCGv_i64 addr = new_tmp_a64(s);
4655 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
4656 (a->nreg + 1) << dtype_msz(a->dtype));
4657 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4658 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4659 }
4660 return true;
4661}
4662
4663static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4664{
4665 if (sve_access_check(s)) {
4666 int vsz = vec_full_reg_size(s);
4667 int elements = vsz >> dtype_esz[a->dtype];
4668 TCGv_i64 addr = new_tmp_a64(s);
4669
4670 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4671 (a->imm * elements * (a->nreg + 1))
4672 << dtype_msz(a->dtype));
4673 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4674 }
4675 return true;
4676}
e2654d75
RH
4677
4678static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4679{
4680 static gen_helper_gvec_mem * const fns[16] = {
4681 gen_helper_sve_ldff1bb_r,
4682 gen_helper_sve_ldff1bhu_r,
4683 gen_helper_sve_ldff1bsu_r,
4684 gen_helper_sve_ldff1bdu_r,
4685
4686 gen_helper_sve_ldff1sds_r,
4687 gen_helper_sve_ldff1hh_r,
4688 gen_helper_sve_ldff1hsu_r,
4689 gen_helper_sve_ldff1hdu_r,
4690
4691 gen_helper_sve_ldff1hds_r,
4692 gen_helper_sve_ldff1hss_r,
4693 gen_helper_sve_ldff1ss_r,
4694 gen_helper_sve_ldff1sdu_r,
4695
4696 gen_helper_sve_ldff1bds_r,
4697 gen_helper_sve_ldff1bss_r,
4698 gen_helper_sve_ldff1bhs_r,
4699 gen_helper_sve_ldff1dd_r,
4700 };
4701
4702 if (sve_access_check(s)) {
4703 TCGv_i64 addr = new_tmp_a64(s);
4704 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4705 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4706 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4707 }
4708 return true;
4709}
4710
4711static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4712{
4713 static gen_helper_gvec_mem * const fns[16] = {
4714 gen_helper_sve_ldnf1bb_r,
4715 gen_helper_sve_ldnf1bhu_r,
4716 gen_helper_sve_ldnf1bsu_r,
4717 gen_helper_sve_ldnf1bdu_r,
4718
4719 gen_helper_sve_ldnf1sds_r,
4720 gen_helper_sve_ldnf1hh_r,
4721 gen_helper_sve_ldnf1hsu_r,
4722 gen_helper_sve_ldnf1hdu_r,
4723
4724 gen_helper_sve_ldnf1hds_r,
4725 gen_helper_sve_ldnf1hss_r,
4726 gen_helper_sve_ldnf1ss_r,
4727 gen_helper_sve_ldnf1sdu_r,
4728
4729 gen_helper_sve_ldnf1bds_r,
4730 gen_helper_sve_ldnf1bss_r,
4731 gen_helper_sve_ldnf1bhs_r,
4732 gen_helper_sve_ldnf1dd_r,
4733 };
4734
4735 if (sve_access_check(s)) {
4736 int vsz = vec_full_reg_size(s);
4737 int elements = vsz >> dtype_esz[a->dtype];
4738 int off = (a->imm * elements) << dtype_msz(a->dtype);
4739 TCGv_i64 addr = new_tmp_a64(s);
4740
4741 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4742 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4743 }
4744 return true;
4745}
1a039c7e 4746
05abe304
RH
4747static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4748{
4749 static gen_helper_gvec_mem * const fns[4] = {
4750 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4751 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4752 };
4753 unsigned vsz = vec_full_reg_size(s);
4754 TCGv_ptr t_pg;
4755 TCGv_i32 desc;
4756
4757 /* Load the first quadword using the normal predicated load helpers. */
4758 desc = tcg_const_i32(simd_desc(16, 16, zt));
4759 t_pg = tcg_temp_new_ptr();
4760
4761 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4762 fns[msz](cpu_env, t_pg, addr, desc);
4763
4764 tcg_temp_free_ptr(t_pg);
4765 tcg_temp_free_i32(desc);
4766
4767 /* Replicate that first quadword. */
4768 if (vsz > 16) {
4769 unsigned dofs = vec_full_reg_offset(s, zt);
4770 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4771 }
4772}
4773
4774static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4775{
4776 if (a->rm == 31) {
4777 return false;
4778 }
4779 if (sve_access_check(s)) {
4780 int msz = dtype_msz(a->dtype);
4781 TCGv_i64 addr = new_tmp_a64(s);
4782 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4783 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4784 do_ldrq(s, a->rd, a->pg, addr, msz);
4785 }
4786 return true;
4787}
4788
4789static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4790{
4791 if (sve_access_check(s)) {
4792 TCGv_i64 addr = new_tmp_a64(s);
4793 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4794 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4795 }
4796 return true;
4797}
4798
68459864
RH
4799/* Load and broadcast element. */
4800static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4801{
4802 if (!sve_access_check(s)) {
4803 return true;
4804 }
4805
4806 unsigned vsz = vec_full_reg_size(s);
4807 unsigned psz = pred_full_reg_size(s);
4808 unsigned esz = dtype_esz[a->dtype];
4809 TCGLabel *over = gen_new_label();
4810 TCGv_i64 temp;
4811
4812 /* If the guarding predicate has no bits set, no load occurs. */
4813 if (psz <= 8) {
4814 /* Reduce the pred_esz_masks value simply to reduce the
4815 * size of the code generated here.
4816 */
4817 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4818 temp = tcg_temp_new_i64();
4819 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4820 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4821 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4822 tcg_temp_free_i64(temp);
4823 } else {
4824 TCGv_i32 t32 = tcg_temp_new_i32();
4825 find_last_active(s, t32, esz, a->pg);
4826 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4827 tcg_temp_free_i32(t32);
4828 }
4829
4830 /* Load the data. */
4831 temp = tcg_temp_new_i64();
4832 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4833 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4834 s->be_data | dtype_mop[a->dtype]);
4835
4836 /* Broadcast to *all* elements. */
4837 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4838 vsz, vsz, temp);
4839 tcg_temp_free_i64(temp);
4840
4841 /* Zero the inactive elements. */
4842 gen_set_label(over);
4843 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4844 return true;
4845}
4846
1a039c7e
RH
4847static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4848 int msz, int esz, int nreg)
4849{
4850 static gen_helper_gvec_mem * const fn_single[4][4] = {
4851 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4852 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4853 { NULL, gen_helper_sve_st1hh_r,
4854 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4855 { NULL, NULL,
4856 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4857 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4858 };
4859 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4860 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4861 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4862 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4863 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4864 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4865 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4866 };
4867 gen_helper_gvec_mem *fn;
4868
4869 if (nreg == 0) {
4870 /* ST1 */
4871 fn = fn_single[msz][esz];
4872 } else {
4873 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4874 assert(msz == esz);
4875 fn = fn_multiple[nreg - 1][msz];
4876 }
4877 assert(fn != NULL);
4878 do_mem_zpa(s, zt, pg, addr, fn);
4879}
4880
4881static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4882{
4883 if (a->rm == 31 || a->msz > a->esz) {
4884 return false;
4885 }
4886 if (sve_access_check(s)) {
4887 TCGv_i64 addr = new_tmp_a64(s);
4888 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4889 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4890 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4891 }
4892 return true;
4893}
4894
4895static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4896{
4897 if (a->msz > a->esz) {
4898 return false;
4899 }
4900 if (sve_access_check(s)) {
4901 int vsz = vec_full_reg_size(s);
4902 int elements = vsz >> a->esz;
4903 TCGv_i64 addr = new_tmp_a64(s);
4904
4905 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4906 (a->imm * elements * (a->nreg + 1)) << a->msz);
4907 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4908 }
4909 return true;
4910}
f6dbf62a
RH
4911
4912/*
4913 *** SVE gather loads / scatter stores
4914 */
4915
4916static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4917 TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4918{
4919 unsigned vsz = vec_full_reg_size(s);
4920 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4921 TCGv_ptr t_zm = tcg_temp_new_ptr();
4922 TCGv_ptr t_pg = tcg_temp_new_ptr();
4923 TCGv_ptr t_zt = tcg_temp_new_ptr();
4924
4925 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4926 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4927 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4928 fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4929
4930 tcg_temp_free_ptr(t_zt);
4931 tcg_temp_free_ptr(t_zm);
4932 tcg_temp_free_ptr(t_pg);
4933 tcg_temp_free_i32(desc);
4934}
4935
673e9fa6
RH
4936/* Indexed by [ff][xs][u][msz]. */
4937static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4938 { { { gen_helper_sve_ldbss_zsu,
4939 gen_helper_sve_ldhss_zsu,
4940 NULL, },
4941 { gen_helper_sve_ldbsu_zsu,
4942 gen_helper_sve_ldhsu_zsu,
4943 gen_helper_sve_ldssu_zsu, } },
4944 { { gen_helper_sve_ldbss_zss,
4945 gen_helper_sve_ldhss_zss,
4946 NULL, },
4947 { gen_helper_sve_ldbsu_zss,
4948 gen_helper_sve_ldhsu_zss,
4949 gen_helper_sve_ldssu_zss, } } },
ed67eb7f
RH
4950
4951 { { { gen_helper_sve_ldffbss_zsu,
4952 gen_helper_sve_ldffhss_zsu,
4953 NULL, },
4954 { gen_helper_sve_ldffbsu_zsu,
4955 gen_helper_sve_ldffhsu_zsu,
4956 gen_helper_sve_ldffssu_zsu, } },
4957 { { gen_helper_sve_ldffbss_zss,
4958 gen_helper_sve_ldffhss_zss,
4959 NULL, },
4960 { gen_helper_sve_ldffbsu_zss,
4961 gen_helper_sve_ldffhsu_zss,
4962 gen_helper_sve_ldffssu_zss, } } }
673e9fa6
RH
4963};
4964
4965/* Note that we overload xs=2 to indicate 64-bit offset. */
4966static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4967 { { { gen_helper_sve_ldbds_zsu,
4968 gen_helper_sve_ldhds_zsu,
4969 gen_helper_sve_ldsds_zsu,
4970 NULL, },
4971 { gen_helper_sve_ldbdu_zsu,
4972 gen_helper_sve_ldhdu_zsu,
4973 gen_helper_sve_ldsdu_zsu,
4974 gen_helper_sve_ldddu_zsu, } },
4975 { { gen_helper_sve_ldbds_zss,
4976 gen_helper_sve_ldhds_zss,
4977 gen_helper_sve_ldsds_zss,
4978 NULL, },
4979 { gen_helper_sve_ldbdu_zss,
4980 gen_helper_sve_ldhdu_zss,
4981 gen_helper_sve_ldsdu_zss,
4982 gen_helper_sve_ldddu_zss, } },
4983 { { gen_helper_sve_ldbds_zd,
4984 gen_helper_sve_ldhds_zd,
4985 gen_helper_sve_ldsds_zd,
4986 NULL, },
4987 { gen_helper_sve_ldbdu_zd,
4988 gen_helper_sve_ldhdu_zd,
4989 gen_helper_sve_ldsdu_zd,
4990 gen_helper_sve_ldddu_zd, } } },
ed67eb7f
RH
4991
4992 { { { gen_helper_sve_ldffbds_zsu,
4993 gen_helper_sve_ldffhds_zsu,
4994 gen_helper_sve_ldffsds_zsu,
4995 NULL, },
4996 { gen_helper_sve_ldffbdu_zsu,
4997 gen_helper_sve_ldffhdu_zsu,
4998 gen_helper_sve_ldffsdu_zsu,
4999 gen_helper_sve_ldffddu_zsu, } },
5000 { { gen_helper_sve_ldffbds_zss,
5001 gen_helper_sve_ldffhds_zss,
5002 gen_helper_sve_ldffsds_zss,
5003 NULL, },
5004 { gen_helper_sve_ldffbdu_zss,
5005 gen_helper_sve_ldffhdu_zss,
5006 gen_helper_sve_ldffsdu_zss,
5007 gen_helper_sve_ldffddu_zss, } },
5008 { { gen_helper_sve_ldffbds_zd,
5009 gen_helper_sve_ldffhds_zd,
5010 gen_helper_sve_ldffsds_zd,
5011 NULL, },
5012 { gen_helper_sve_ldffbdu_zd,
5013 gen_helper_sve_ldffhdu_zd,
5014 gen_helper_sve_ldffsdu_zd,
5015 gen_helper_sve_ldffddu_zd, } } }
673e9fa6
RH
5016};
5017
5018static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
5019{
5020 gen_helper_gvec_mem_scatter *fn = NULL;
5021
5022 if (!sve_access_check(s)) {
5023 return true;
5024 }
5025
5026 switch (a->esz) {
5027 case MO_32:
5028 fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
5029 break;
5030 case MO_64:
5031 fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
5032 break;
5033 }
5034 assert(fn != NULL);
5035
5036 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5037 cpu_reg_sp(s, a->rn), fn);
5038 return true;
5039}
5040
5041static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
5042{
5043 gen_helper_gvec_mem_scatter *fn = NULL;
5044 TCGv_i64 imm;
5045
5046 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5047 return false;
5048 }
5049 if (!sve_access_check(s)) {
5050 return true;
5051 }
5052
5053 switch (a->esz) {
5054 case MO_32:
5055 fn = gather_load_fn32[a->ff][0][a->u][a->msz];
5056 break;
5057 case MO_64:
5058 fn = gather_load_fn64[a->ff][2][a->u][a->msz];
5059 break;
5060 }
5061 assert(fn != NULL);
5062
5063 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5064 * by loading the immediate into the scalar parameter.
5065 */
5066 imm = tcg_const_i64(a->imm << a->msz);
5067 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5068 tcg_temp_free_i64(imm);
5069 return true;
5070}
5071
408ecde9
RH
5072/* Indexed by [xs][msz]. */
5073static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
5074 { gen_helper_sve_stbs_zsu,
5075 gen_helper_sve_sths_zsu,
5076 gen_helper_sve_stss_zsu, },
5077 { gen_helper_sve_stbs_zss,
5078 gen_helper_sve_sths_zss,
5079 gen_helper_sve_stss_zss, },
5080};
5081
5082/* Note that we overload xs=2 to indicate 64-bit offset. */
5083static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
5084 { gen_helper_sve_stbd_zsu,
5085 gen_helper_sve_sthd_zsu,
5086 gen_helper_sve_stsd_zsu,
5087 gen_helper_sve_stdd_zsu, },
5088 { gen_helper_sve_stbd_zss,
5089 gen_helper_sve_sthd_zss,
5090 gen_helper_sve_stsd_zss,
5091 gen_helper_sve_stdd_zss, },
5092 { gen_helper_sve_stbd_zd,
5093 gen_helper_sve_sthd_zd,
5094 gen_helper_sve_stsd_zd,
5095 gen_helper_sve_stdd_zd, },
5096};
5097
f6dbf62a
RH
5098static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
5099{
f6dbf62a
RH
5100 gen_helper_gvec_mem_scatter *fn;
5101
5102 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5103 return false;
5104 }
5105 if (!sve_access_check(s)) {
5106 return true;
5107 }
5108 switch (a->esz) {
5109 case MO_32:
408ecde9 5110 fn = scatter_store_fn32[a->xs][a->msz];
f6dbf62a
RH
5111 break;
5112 case MO_64:
408ecde9 5113 fn = scatter_store_fn64[a->xs][a->msz];
f6dbf62a
RH
5114 break;
5115 default:
5116 g_assert_not_reached();
5117 }
5118 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5119 cpu_reg_sp(s, a->rn), fn);
5120 return true;
5121}
dec6cf6b 5122
408ecde9
RH
5123static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
5124{
5125 gen_helper_gvec_mem_scatter *fn = NULL;
5126 TCGv_i64 imm;
5127
5128 if (a->esz < a->msz) {
5129 return false;
5130 }
5131 if (!sve_access_check(s)) {
5132 return true;
5133 }
5134
5135 switch (a->esz) {
5136 case MO_32:
5137 fn = scatter_store_fn32[0][a->msz];
5138 break;
5139 case MO_64:
5140 fn = scatter_store_fn64[2][a->msz];
5141 break;
5142 }
5143 assert(fn != NULL);
5144
5145 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5146 * by loading the immediate into the scalar parameter.
5147 */
5148 imm = tcg_const_i64(a->imm << a->msz);
5149 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5150 tcg_temp_free_i64(imm);
5151 return true;
5152}
5153
dec6cf6b
RH
5154/*
5155 * Prefetches
5156 */
5157
5158static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
5159{
5160 /* Prefetch is a nop within QEMU. */
2f95a3b0 5161 (void)sve_access_check(s);
dec6cf6b
RH
5162 return true;
5163}
5164
5165static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
5166{
5167 if (a->rm == 31) {
5168 return false;
5169 }
5170 /* Prefetch is a nop within QEMU. */
2f95a3b0 5171 (void)sve_access_check(s);
dec6cf6b
RH
5172 return true;
5173}
a2103582
RH
5174
5175/*
5176 * Move Prefix
5177 *
5178 * TODO: The implementation so far could handle predicated merging movprfx.
5179 * The helper functions as written take an extra source register to
5180 * use in the operation, but the result is only written when predication
5181 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5182 * to allow the final write back to the destination to be unconditional.
5183 * For predicated zeroing movprfx, we need to rearrange the helpers to
5184 * allow the final write back to zero inactives.
5185 *
5186 * In the meantime, just emit the moves.
5187 */
5188
5189static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a, uint32_t insn)
5190{
5191 return do_mov_z(s, a->rd, a->rn);
5192}
5193
5194static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5195{
5196 if (sve_access_check(s)) {
5197 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5198 }
5199 return true;
5200}
5201
5202static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5203{
5204 if (sve_access_check(s)) {
5205 do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5206 }
5207 return true;
5208}