]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
cc48affe 35#include "fpu/softfloat.h"
38388f7e 36
757f9cff 37
9ee3a611
RH
38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
38cadeba
RH
41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
757f9cff
RH
43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
c4e7c493 46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
f6dbf62a
RH
47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
c4e7c493 49
ccd841c3
RH
50/*
51 * Helpers for extracting complex instruction fields.
52 */
53
54/* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
57static int tszimm_esz(int x)
58{
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61}
62
63static int tszimm_shr(int x)
64{
65 return (16 << tszimm_esz(x)) - x;
66}
67
68/* See e.g. LSL (immediate, predicated). */
69static int tszimm_shl(int x)
70{
71 return x - (8 << tszimm_esz(x));
72}
73
24e82e68
RH
74static inline int plus1(int x)
75{
76 return x + 1;
77}
78
f25a2361
RH
79/* The SH bit is in bit 8. Extract the low 8 and shift. */
80static inline int expand_imm_sh8s(int x)
81{
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83}
84
6e6a157d
RH
85static inline int expand_imm_sh8u(int x)
86{
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88}
89
c4e7c493
RH
90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
93static inline int msz_dtype(int msz)
94{
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97}
98
38388f7e
RH
99/*
100 * Include the generated decoder.
101 */
102
103#include "decode-sve.inc.c"
104
105/*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
d1822297
RH
109/* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112static inline int pred_full_reg_offset(DisasContext *s, int regno)
113{
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115}
116
117/* Return the byte size of the whole predicate register, VL / 64. */
118static inline int pred_full_reg_size(DisasContext *s)
119{
120 return s->sve_len >> 3;
121}
122
516e246a
RH
123/* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131static int size_for_gvec(int size)
132{
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138}
139
140static int pred_gvec_reg_size(DisasContext *s)
141{
142 return size_for_gvec(pred_full_reg_size(s));
143}
144
39eea561
RH
145/* Invoke a vector expander on two Zregs. */
146static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
147 int esz, int rd, int rn)
38388f7e 148{
39eea561
RH
149 if (sve_access_check(s)) {
150 unsigned vsz = vec_full_reg_size(s);
151 gvec_fn(esz, vec_full_reg_offset(s, rd),
152 vec_full_reg_offset(s, rn), vsz, vsz);
153 }
154 return true;
38388f7e
RH
155}
156
39eea561
RH
157/* Invoke a vector expander on three Zregs. */
158static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
159 int esz, int rd, int rn, int rm)
38388f7e 160{
39eea561
RH
161 if (sve_access_check(s)) {
162 unsigned vsz = vec_full_reg_size(s);
163 gvec_fn(esz, vec_full_reg_offset(s, rd),
164 vec_full_reg_offset(s, rn),
165 vec_full_reg_offset(s, rm), vsz, vsz);
166 }
167 return true;
38388f7e
RH
168}
169
39eea561
RH
170/* Invoke a vector move on two Zregs. */
171static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 172{
39eea561 173 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
174}
175
d9d78dcc
RH
176/* Initialize a Zreg with replications of a 64-bit immediate. */
177static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
178{
179 unsigned vsz = vec_full_reg_size(s);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
181}
182
516e246a
RH
183/* Invoke a vector expander on two Pregs. */
184static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
185 int esz, int rd, int rn)
186{
187 if (sve_access_check(s)) {
188 unsigned psz = pred_gvec_reg_size(s);
189 gvec_fn(esz, pred_full_reg_offset(s, rd),
190 pred_full_reg_offset(s, rn), psz, psz);
191 }
192 return true;
193}
194
195/* Invoke a vector expander on three Pregs. */
196static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
197 int esz, int rd, int rn, int rm)
198{
199 if (sve_access_check(s)) {
200 unsigned psz = pred_gvec_reg_size(s);
201 gvec_fn(esz, pred_full_reg_offset(s, rd),
202 pred_full_reg_offset(s, rn),
203 pred_full_reg_offset(s, rm), psz, psz);
204 }
205 return true;
206}
207
208/* Invoke a vector operation on four Pregs. */
209static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
210 int rd, int rn, int rm, int rg)
211{
212 if (sve_access_check(s)) {
213 unsigned psz = pred_gvec_reg_size(s);
214 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
215 pred_full_reg_offset(s, rn),
216 pred_full_reg_offset(s, rm),
217 pred_full_reg_offset(s, rg),
218 psz, psz, gvec_op);
219 }
220 return true;
221}
222
223/* Invoke a vector move on two Pregs. */
224static bool do_mov_p(DisasContext *s, int rd, int rn)
225{
226 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
227}
228
9e18d7a6
RH
229/* Set the cpu flags as per a return from an SVE helper. */
230static void do_pred_flags(TCGv_i32 t)
231{
232 tcg_gen_mov_i32(cpu_NF, t);
233 tcg_gen_andi_i32(cpu_ZF, t, 2);
234 tcg_gen_andi_i32(cpu_CF, t, 1);
235 tcg_gen_movi_i32(cpu_VF, 0);
236}
237
238/* Subroutines computing the ARM PredTest psuedofunction. */
239static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
240{
241 TCGv_i32 t = tcg_temp_new_i32();
242
243 gen_helper_sve_predtest1(t, d, g);
244 do_pred_flags(t);
245 tcg_temp_free_i32(t);
246}
247
248static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
249{
250 TCGv_ptr dptr = tcg_temp_new_ptr();
251 TCGv_ptr gptr = tcg_temp_new_ptr();
252 TCGv_i32 t;
253
254 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
255 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
256 t = tcg_const_i32(words);
257
258 gen_helper_sve_predtest(t, dptr, gptr, t);
259 tcg_temp_free_ptr(dptr);
260 tcg_temp_free_ptr(gptr);
261
262 do_pred_flags(t);
263 tcg_temp_free_i32(t);
264}
265
028e2a7b
RH
266/* For each element size, the bits within a predicate word that are active. */
267const uint64_t pred_esz_masks[4] = {
268 0xffffffffffffffffull, 0x5555555555555555ull,
269 0x1111111111111111ull, 0x0101010101010101ull
270};
271
39eea561
RH
272/*
273 *** SVE Logical - Unpredicated Group
274 */
275
276static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
277{
278 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
279}
280
281static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
282{
283 if (a->rn == a->rm) { /* MOV */
284 return do_mov_z(s, a->rd, a->rn);
285 } else {
286 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
287 }
288}
289
290static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291{
292 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
293}
294
295static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 296{
39eea561 297 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 298}
d1822297 299
fea98f9c
RH
300/*
301 *** SVE Integer Arithmetic - Unpredicated Group
302 */
303
304static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
305{
306 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
307}
308
309static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
310{
311 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
312}
313
314static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
315{
316 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
317}
318
319static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
320{
321 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
322}
323
324static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
325{
326 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
327}
328
329static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
330{
331 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
332}
333
f97cfd59
RH
334/*
335 *** SVE Integer Arithmetic - Binary Predicated Group
336 */
337
338static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
339{
340 unsigned vsz = vec_full_reg_size(s);
341 if (fn == NULL) {
342 return false;
343 }
344 if (sve_access_check(s)) {
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
346 vec_full_reg_offset(s, a->rn),
347 vec_full_reg_offset(s, a->rm),
348 pred_full_reg_offset(s, a->pg),
349 vsz, vsz, 0, fn);
350 }
351 return true;
352}
353
a2103582
RH
354/* Select active elememnts from Zn and inactive elements from Zm,
355 * storing the result in Zd.
356 */
357static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
358{
359 static gen_helper_gvec_4 * const fns[4] = {
360 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
361 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
362 };
363 unsigned vsz = vec_full_reg_size(s);
364 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
365 vec_full_reg_offset(s, rn),
366 vec_full_reg_offset(s, rm),
367 pred_full_reg_offset(s, pg),
368 vsz, vsz, 0, fns[esz]);
369}
370
f97cfd59
RH
371#define DO_ZPZZ(NAME, name) \
372static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
373 uint32_t insn) \
374{ \
375 static gen_helper_gvec_4 * const fns[4] = { \
376 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
377 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
378 }; \
379 return do_zpzz_ool(s, a, fns[a->esz]); \
380}
381
382DO_ZPZZ(AND, and)
383DO_ZPZZ(EOR, eor)
384DO_ZPZZ(ORR, orr)
385DO_ZPZZ(BIC, bic)
386
387DO_ZPZZ(ADD, add)
388DO_ZPZZ(SUB, sub)
389
390DO_ZPZZ(SMAX, smax)
391DO_ZPZZ(UMAX, umax)
392DO_ZPZZ(SMIN, smin)
393DO_ZPZZ(UMIN, umin)
394DO_ZPZZ(SABD, sabd)
395DO_ZPZZ(UABD, uabd)
396
397DO_ZPZZ(MUL, mul)
398DO_ZPZZ(SMULH, smulh)
399DO_ZPZZ(UMULH, umulh)
400
27721dbb
RH
401DO_ZPZZ(ASR, asr)
402DO_ZPZZ(LSR, lsr)
403DO_ZPZZ(LSL, lsl)
404
f97cfd59
RH
405static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
406{
407 static gen_helper_gvec_4 * const fns[4] = {
408 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
409 };
410 return do_zpzz_ool(s, a, fns[a->esz]);
411}
412
413static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
414{
415 static gen_helper_gvec_4 * const fns[4] = {
416 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
417 };
418 return do_zpzz_ool(s, a, fns[a->esz]);
419}
420
a2103582
RH
421static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
422{
423 if (sve_access_check(s)) {
424 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
425 }
426 return true;
427}
d3fe4a29 428
f97cfd59
RH
429#undef DO_ZPZZ
430
afac6d04
RH
431/*
432 *** SVE Integer Arithmetic - Unary Predicated Group
433 */
434
435static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
436{
437 if (fn == NULL) {
438 return false;
439 }
440 if (sve_access_check(s)) {
441 unsigned vsz = vec_full_reg_size(s);
442 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
443 vec_full_reg_offset(s, a->rn),
444 pred_full_reg_offset(s, a->pg),
445 vsz, vsz, 0, fn);
446 }
447 return true;
448}
449
450#define DO_ZPZ(NAME, name) \
451static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
452{ \
453 static gen_helper_gvec_3 * const fns[4] = { \
454 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
455 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
456 }; \
457 return do_zpz_ool(s, a, fns[a->esz]); \
458}
459
460DO_ZPZ(CLS, cls)
461DO_ZPZ(CLZ, clz)
462DO_ZPZ(CNT_zpz, cnt_zpz)
463DO_ZPZ(CNOT, cnot)
464DO_ZPZ(NOT_zpz, not_zpz)
465DO_ZPZ(ABS, abs)
466DO_ZPZ(NEG, neg)
467
468static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
469{
470 static gen_helper_gvec_3 * const fns[4] = {
471 NULL,
472 gen_helper_sve_fabs_h,
473 gen_helper_sve_fabs_s,
474 gen_helper_sve_fabs_d
475 };
476 return do_zpz_ool(s, a, fns[a->esz]);
477}
478
479static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
480{
481 static gen_helper_gvec_3 * const fns[4] = {
482 NULL,
483 gen_helper_sve_fneg_h,
484 gen_helper_sve_fneg_s,
485 gen_helper_sve_fneg_d
486 };
487 return do_zpz_ool(s, a, fns[a->esz]);
488}
489
490static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
491{
492 static gen_helper_gvec_3 * const fns[4] = {
493 NULL,
494 gen_helper_sve_sxtb_h,
495 gen_helper_sve_sxtb_s,
496 gen_helper_sve_sxtb_d
497 };
498 return do_zpz_ool(s, a, fns[a->esz]);
499}
500
501static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
502{
503 static gen_helper_gvec_3 * const fns[4] = {
504 NULL,
505 gen_helper_sve_uxtb_h,
506 gen_helper_sve_uxtb_s,
507 gen_helper_sve_uxtb_d
508 };
509 return do_zpz_ool(s, a, fns[a->esz]);
510}
511
512static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
513{
514 static gen_helper_gvec_3 * const fns[4] = {
515 NULL, NULL,
516 gen_helper_sve_sxth_s,
517 gen_helper_sve_sxth_d
518 };
519 return do_zpz_ool(s, a, fns[a->esz]);
520}
521
522static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
523{
524 static gen_helper_gvec_3 * const fns[4] = {
525 NULL, NULL,
526 gen_helper_sve_uxth_s,
527 gen_helper_sve_uxth_d
528 };
529 return do_zpz_ool(s, a, fns[a->esz]);
530}
531
532static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
533{
534 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
535}
536
537static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
538{
539 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
540}
541
542#undef DO_ZPZ
543
047cec97
RH
544/*
545 *** SVE Integer Reduction Group
546 */
547
548typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
549static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
550 gen_helper_gvec_reduc *fn)
551{
552 unsigned vsz = vec_full_reg_size(s);
553 TCGv_ptr t_zn, t_pg;
554 TCGv_i32 desc;
555 TCGv_i64 temp;
556
557 if (fn == NULL) {
558 return false;
559 }
560 if (!sve_access_check(s)) {
561 return true;
562 }
563
564 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
565 temp = tcg_temp_new_i64();
566 t_zn = tcg_temp_new_ptr();
567 t_pg = tcg_temp_new_ptr();
568
569 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
570 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
571 fn(temp, t_zn, t_pg, desc);
572 tcg_temp_free_ptr(t_zn);
573 tcg_temp_free_ptr(t_pg);
574 tcg_temp_free_i32(desc);
575
576 write_fp_dreg(s, a->rd, temp);
577 tcg_temp_free_i64(temp);
578 return true;
579}
580
581#define DO_VPZ(NAME, name) \
582static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
583{ \
584 static gen_helper_gvec_reduc * const fns[4] = { \
585 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
586 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
587 }; \
588 return do_vpz_ool(s, a, fns[a->esz]); \
589}
590
591DO_VPZ(ORV, orv)
592DO_VPZ(ANDV, andv)
593DO_VPZ(EORV, eorv)
594
595DO_VPZ(UADDV, uaddv)
596DO_VPZ(SMAXV, smaxv)
597DO_VPZ(UMAXV, umaxv)
598DO_VPZ(SMINV, sminv)
599DO_VPZ(UMINV, uminv)
600
601static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
602{
603 static gen_helper_gvec_reduc * const fns[4] = {
604 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
605 gen_helper_sve_saddv_s, NULL
606 };
607 return do_vpz_ool(s, a, fns[a->esz]);
608}
609
610#undef DO_VPZ
611
ccd841c3
RH
612/*
613 *** SVE Shift by Immediate - Predicated Group
614 */
615
616/* Store zero into every active element of Zd. We will use this for two
617 * and three-operand predicated instructions for which logic dictates a
618 * zero result.
619 */
620static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
621{
622 static gen_helper_gvec_2 * const fns[4] = {
623 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
624 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
625 };
626 if (sve_access_check(s)) {
627 unsigned vsz = vec_full_reg_size(s);
628 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
629 pred_full_reg_offset(s, pg),
630 vsz, vsz, 0, fns[esz]);
631 }
632 return true;
633}
634
68459864
RH
635/* Copy Zn into Zd, storing zeros into inactive elements. */
636static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
637{
638 static gen_helper_gvec_3 * const fns[4] = {
639 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
640 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
641 };
642 unsigned vsz = vec_full_reg_size(s);
643 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
644 vec_full_reg_offset(s, rn),
645 pred_full_reg_offset(s, pg),
646 vsz, vsz, 0, fns[esz]);
647}
648
ccd841c3
RH
649static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
650 gen_helper_gvec_3 *fn)
651{
652 if (sve_access_check(s)) {
653 unsigned vsz = vec_full_reg_size(s);
654 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
655 vec_full_reg_offset(s, a->rn),
656 pred_full_reg_offset(s, a->pg),
657 vsz, vsz, a->imm, fn);
658 }
659 return true;
660}
661
662static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
663{
664 static gen_helper_gvec_3 * const fns[4] = {
665 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
666 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
667 };
668 if (a->esz < 0) {
669 /* Invalid tsz encoding -- see tszimm_esz. */
670 return false;
671 }
672 /* Shift by element size is architecturally valid. For
673 arithmetic right-shift, it's the same as by one less. */
674 a->imm = MIN(a->imm, (8 << a->esz) - 1);
675 return do_zpzi_ool(s, a, fns[a->esz]);
676}
677
678static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
679{
680 static gen_helper_gvec_3 * const fns[4] = {
681 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
682 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
683 };
684 if (a->esz < 0) {
685 return false;
686 }
687 /* Shift by element size is architecturally valid.
688 For logical shifts, it is a zeroing operation. */
689 if (a->imm >= (8 << a->esz)) {
690 return do_clr_zp(s, a->rd, a->pg, a->esz);
691 } else {
692 return do_zpzi_ool(s, a, fns[a->esz]);
693 }
694}
695
696static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
697{
698 static gen_helper_gvec_3 * const fns[4] = {
699 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
700 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
701 };
702 if (a->esz < 0) {
703 return false;
704 }
705 /* Shift by element size is architecturally valid.
706 For logical shifts, it is a zeroing operation. */
707 if (a->imm >= (8 << a->esz)) {
708 return do_clr_zp(s, a->rd, a->pg, a->esz);
709 } else {
710 return do_zpzi_ool(s, a, fns[a->esz]);
711 }
712}
713
714static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
715{
716 static gen_helper_gvec_3 * const fns[4] = {
717 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
718 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
719 };
720 if (a->esz < 0) {
721 return false;
722 }
723 /* Shift by element size is architecturally valid. For arithmetic
724 right shift for division, it is a zeroing operation. */
725 if (a->imm >= (8 << a->esz)) {
726 return do_clr_zp(s, a->rd, a->pg, a->esz);
727 } else {
728 return do_zpzi_ool(s, a, fns[a->esz]);
729 }
730}
731
fe7f8dfb
RH
732/*
733 *** SVE Bitwise Shift - Predicated Group
734 */
735
736#define DO_ZPZW(NAME, name) \
737static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
738 uint32_t insn) \
739{ \
740 static gen_helper_gvec_4 * const fns[3] = { \
741 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
742 gen_helper_sve_##name##_zpzw_s, \
743 }; \
744 if (a->esz < 0 || a->esz >= 3) { \
745 return false; \
746 } \
747 return do_zpzz_ool(s, a, fns[a->esz]); \
748}
749
750DO_ZPZW(ASR, asr)
751DO_ZPZW(LSR, lsr)
752DO_ZPZW(LSL, lsl)
753
754#undef DO_ZPZW
755
d9d78dcc
RH
756/*
757 *** SVE Bitwise Shift - Unpredicated Group
758 */
759
760static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
761 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
762 int64_t, uint32_t, uint32_t))
763{
764 if (a->esz < 0) {
765 /* Invalid tsz encoding -- see tszimm_esz. */
766 return false;
767 }
768 if (sve_access_check(s)) {
769 unsigned vsz = vec_full_reg_size(s);
770 /* Shift by element size is architecturally valid. For
771 arithmetic right-shift, it's the same as by one less.
772 Otherwise it is a zeroing operation. */
773 if (a->imm >= 8 << a->esz) {
774 if (asr) {
775 a->imm = (8 << a->esz) - 1;
776 } else {
777 do_dupi_z(s, a->rd, 0);
778 return true;
779 }
780 }
781 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
782 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
783 }
784 return true;
785}
786
787static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
788{
789 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
790}
791
792static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
793{
794 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
795}
796
797static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
798{
799 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
800}
801
802static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
803{
804 if (fn == NULL) {
805 return false;
806 }
807 if (sve_access_check(s)) {
808 unsigned vsz = vec_full_reg_size(s);
809 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
810 vec_full_reg_offset(s, a->rn),
811 vec_full_reg_offset(s, a->rm),
812 vsz, vsz, 0, fn);
813 }
814 return true;
815}
816
817#define DO_ZZW(NAME, name) \
818static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
819 uint32_t insn) \
820{ \
821 static gen_helper_gvec_3 * const fns[4] = { \
822 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
823 gen_helper_sve_##name##_zzw_s, NULL \
824 }; \
825 return do_zzw_ool(s, a, fns[a->esz]); \
826}
827
828DO_ZZW(ASR, asr)
829DO_ZZW(LSR, lsr)
830DO_ZZW(LSL, lsl)
831
832#undef DO_ZZW
833
96a36e4a
RH
834/*
835 *** SVE Integer Multiply-Add Group
836 */
837
838static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
839 gen_helper_gvec_5 *fn)
840{
841 if (sve_access_check(s)) {
842 unsigned vsz = vec_full_reg_size(s);
843 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
844 vec_full_reg_offset(s, a->ra),
845 vec_full_reg_offset(s, a->rn),
846 vec_full_reg_offset(s, a->rm),
847 pred_full_reg_offset(s, a->pg),
848 vsz, vsz, 0, fn);
849 }
850 return true;
851}
852
853#define DO_ZPZZZ(NAME, name) \
854static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
855{ \
856 static gen_helper_gvec_5 * const fns[4] = { \
857 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
858 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
859 }; \
860 return do_zpzzz_ool(s, a, fns[a->esz]); \
861}
862
863DO_ZPZZZ(MLA, mla)
864DO_ZPZZZ(MLS, mls)
865
866#undef DO_ZPZZZ
867
9a56c9c3
RH
868/*
869 *** SVE Index Generation Group
870 */
871
872static void do_index(DisasContext *s, int esz, int rd,
873 TCGv_i64 start, TCGv_i64 incr)
874{
875 unsigned vsz = vec_full_reg_size(s);
876 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
877 TCGv_ptr t_zd = tcg_temp_new_ptr();
878
879 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
880 if (esz == 3) {
881 gen_helper_sve_index_d(t_zd, start, incr, desc);
882 } else {
883 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
884 static index_fn * const fns[3] = {
885 gen_helper_sve_index_b,
886 gen_helper_sve_index_h,
887 gen_helper_sve_index_s,
888 };
889 TCGv_i32 s32 = tcg_temp_new_i32();
890 TCGv_i32 i32 = tcg_temp_new_i32();
891
892 tcg_gen_extrl_i64_i32(s32, start);
893 tcg_gen_extrl_i64_i32(i32, incr);
894 fns[esz](t_zd, s32, i32, desc);
895
896 tcg_temp_free_i32(s32);
897 tcg_temp_free_i32(i32);
898 }
899 tcg_temp_free_ptr(t_zd);
900 tcg_temp_free_i32(desc);
901}
902
903static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
904{
905 if (sve_access_check(s)) {
906 TCGv_i64 start = tcg_const_i64(a->imm1);
907 TCGv_i64 incr = tcg_const_i64(a->imm2);
908 do_index(s, a->esz, a->rd, start, incr);
909 tcg_temp_free_i64(start);
910 tcg_temp_free_i64(incr);
911 }
912 return true;
913}
914
915static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
916{
917 if (sve_access_check(s)) {
918 TCGv_i64 start = tcg_const_i64(a->imm);
919 TCGv_i64 incr = cpu_reg(s, a->rm);
920 do_index(s, a->esz, a->rd, start, incr);
921 tcg_temp_free_i64(start);
922 }
923 return true;
924}
925
926static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
927{
928 if (sve_access_check(s)) {
929 TCGv_i64 start = cpu_reg(s, a->rn);
930 TCGv_i64 incr = tcg_const_i64(a->imm);
931 do_index(s, a->esz, a->rd, start, incr);
932 tcg_temp_free_i64(incr);
933 }
934 return true;
935}
936
937static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
938{
939 if (sve_access_check(s)) {
940 TCGv_i64 start = cpu_reg(s, a->rn);
941 TCGv_i64 incr = cpu_reg(s, a->rm);
942 do_index(s, a->esz, a->rd, start, incr);
943 }
944 return true;
945}
946
96f922cc
RH
947/*
948 *** SVE Stack Allocation Group
949 */
950
951static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
952{
953 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
954 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
955 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
956 return true;
957}
958
959static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
960{
961 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
962 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
963 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
964 return true;
965}
966
967static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
968{
969 TCGv_i64 reg = cpu_reg(s, a->rd);
970 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
971 return true;
972}
973
4b242d9c
RH
974/*
975 *** SVE Compute Vector Address Group
976 */
977
978static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
979{
980 if (sve_access_check(s)) {
981 unsigned vsz = vec_full_reg_size(s);
982 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
983 vec_full_reg_offset(s, a->rn),
984 vec_full_reg_offset(s, a->rm),
985 vsz, vsz, a->imm, fn);
986 }
987 return true;
988}
989
990static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
991{
992 return do_adr(s, a, gen_helper_sve_adr_p32);
993}
994
995static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
996{
997 return do_adr(s, a, gen_helper_sve_adr_p64);
998}
999
1000static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
1001{
1002 return do_adr(s, a, gen_helper_sve_adr_s32);
1003}
1004
1005static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
1006{
1007 return do_adr(s, a, gen_helper_sve_adr_u32);
1008}
1009
0762cd42
RH
1010/*
1011 *** SVE Integer Misc - Unpredicated Group
1012 */
1013
1014static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1015{
1016 static gen_helper_gvec_2 * const fns[4] = {
1017 NULL,
1018 gen_helper_sve_fexpa_h,
1019 gen_helper_sve_fexpa_s,
1020 gen_helper_sve_fexpa_d,
1021 };
1022 if (a->esz == 0) {
1023 return false;
1024 }
1025 if (sve_access_check(s)) {
1026 unsigned vsz = vec_full_reg_size(s);
1027 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1028 vec_full_reg_offset(s, a->rn),
1029 vsz, vsz, 0, fns[a->esz]);
1030 }
1031 return true;
1032}
1033
a1f233f2
RH
1034static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1035{
1036 static gen_helper_gvec_3 * const fns[4] = {
1037 NULL,
1038 gen_helper_sve_ftssel_h,
1039 gen_helper_sve_ftssel_s,
1040 gen_helper_sve_ftssel_d,
1041 };
1042 if (a->esz == 0) {
1043 return false;
1044 }
1045 if (sve_access_check(s)) {
1046 unsigned vsz = vec_full_reg_size(s);
1047 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1048 vec_full_reg_offset(s, a->rn),
1049 vec_full_reg_offset(s, a->rm),
1050 vsz, vsz, 0, fns[a->esz]);
1051 }
1052 return true;
1053}
1054
516e246a
RH
1055/*
1056 *** SVE Predicate Logical Operations Group
1057 */
1058
1059static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1060 const GVecGen4 *gvec_op)
1061{
1062 if (!sve_access_check(s)) {
1063 return true;
1064 }
1065
1066 unsigned psz = pred_gvec_reg_size(s);
1067 int dofs = pred_full_reg_offset(s, a->rd);
1068 int nofs = pred_full_reg_offset(s, a->rn);
1069 int mofs = pred_full_reg_offset(s, a->rm);
1070 int gofs = pred_full_reg_offset(s, a->pg);
1071
1072 if (psz == 8) {
1073 /* Do the operation and the flags generation in temps. */
1074 TCGv_i64 pd = tcg_temp_new_i64();
1075 TCGv_i64 pn = tcg_temp_new_i64();
1076 TCGv_i64 pm = tcg_temp_new_i64();
1077 TCGv_i64 pg = tcg_temp_new_i64();
1078
1079 tcg_gen_ld_i64(pn, cpu_env, nofs);
1080 tcg_gen_ld_i64(pm, cpu_env, mofs);
1081 tcg_gen_ld_i64(pg, cpu_env, gofs);
1082
1083 gvec_op->fni8(pd, pn, pm, pg);
1084 tcg_gen_st_i64(pd, cpu_env, dofs);
1085
1086 do_predtest1(pd, pg);
1087
1088 tcg_temp_free_i64(pd);
1089 tcg_temp_free_i64(pn);
1090 tcg_temp_free_i64(pm);
1091 tcg_temp_free_i64(pg);
1092 } else {
1093 /* The operation and flags generation is large. The computation
1094 * of the flags depends on the original contents of the guarding
1095 * predicate. If the destination overwrites the guarding predicate,
1096 * then the easiest way to get this right is to save a copy.
1097 */
1098 int tofs = gofs;
1099 if (a->rd == a->pg) {
1100 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1101 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1102 }
1103
1104 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1105 do_predtest(s, dofs, tofs, psz / 8);
1106 }
1107 return true;
1108}
1109
1110static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1111{
1112 tcg_gen_and_i64(pd, pn, pm);
1113 tcg_gen_and_i64(pd, pd, pg);
1114}
1115
1116static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1117 TCGv_vec pm, TCGv_vec pg)
1118{
1119 tcg_gen_and_vec(vece, pd, pn, pm);
1120 tcg_gen_and_vec(vece, pd, pd, pg);
1121}
1122
1123static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1124{
1125 static const GVecGen4 op = {
1126 .fni8 = gen_and_pg_i64,
1127 .fniv = gen_and_pg_vec,
1128 .fno = gen_helper_sve_and_pppp,
1129 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1130 };
1131 if (a->s) {
1132 return do_pppp_flags(s, a, &op);
1133 } else if (a->rn == a->rm) {
1134 if (a->pg == a->rn) {
1135 return do_mov_p(s, a->rd, a->rn);
1136 } else {
1137 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1138 }
1139 } else if (a->pg == a->rn || a->pg == a->rm) {
1140 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1141 } else {
1142 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1143 }
1144}
1145
1146static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1147{
1148 tcg_gen_andc_i64(pd, pn, pm);
1149 tcg_gen_and_i64(pd, pd, pg);
1150}
1151
1152static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1153 TCGv_vec pm, TCGv_vec pg)
1154{
1155 tcg_gen_andc_vec(vece, pd, pn, pm);
1156 tcg_gen_and_vec(vece, pd, pd, pg);
1157}
1158
1159static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1160{
1161 static const GVecGen4 op = {
1162 .fni8 = gen_bic_pg_i64,
1163 .fniv = gen_bic_pg_vec,
1164 .fno = gen_helper_sve_bic_pppp,
1165 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1166 };
1167 if (a->s) {
1168 return do_pppp_flags(s, a, &op);
1169 } else if (a->pg == a->rn) {
1170 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1171 } else {
1172 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1173 }
1174}
1175
1176static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1177{
1178 tcg_gen_xor_i64(pd, pn, pm);
1179 tcg_gen_and_i64(pd, pd, pg);
1180}
1181
1182static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1183 TCGv_vec pm, TCGv_vec pg)
1184{
1185 tcg_gen_xor_vec(vece, pd, pn, pm);
1186 tcg_gen_and_vec(vece, pd, pd, pg);
1187}
1188
1189static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1190{
1191 static const GVecGen4 op = {
1192 .fni8 = gen_eor_pg_i64,
1193 .fniv = gen_eor_pg_vec,
1194 .fno = gen_helper_sve_eor_pppp,
1195 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1196 };
1197 if (a->s) {
1198 return do_pppp_flags(s, a, &op);
1199 } else {
1200 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1201 }
1202}
1203
1204static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1205{
1206 tcg_gen_and_i64(pn, pn, pg);
1207 tcg_gen_andc_i64(pm, pm, pg);
1208 tcg_gen_or_i64(pd, pn, pm);
1209}
1210
1211static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1212 TCGv_vec pm, TCGv_vec pg)
1213{
1214 tcg_gen_and_vec(vece, pn, pn, pg);
1215 tcg_gen_andc_vec(vece, pm, pm, pg);
1216 tcg_gen_or_vec(vece, pd, pn, pm);
1217}
1218
1219static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1220{
1221 static const GVecGen4 op = {
1222 .fni8 = gen_sel_pg_i64,
1223 .fniv = gen_sel_pg_vec,
1224 .fno = gen_helper_sve_sel_pppp,
1225 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1226 };
1227 if (a->s) {
1228 return false;
1229 } else {
1230 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1231 }
1232}
1233
1234static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1235{
1236 tcg_gen_or_i64(pd, pn, pm);
1237 tcg_gen_and_i64(pd, pd, pg);
1238}
1239
1240static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1241 TCGv_vec pm, TCGv_vec pg)
1242{
1243 tcg_gen_or_vec(vece, pd, pn, pm);
1244 tcg_gen_and_vec(vece, pd, pd, pg);
1245}
1246
1247static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1248{
1249 static const GVecGen4 op = {
1250 .fni8 = gen_orr_pg_i64,
1251 .fniv = gen_orr_pg_vec,
1252 .fno = gen_helper_sve_orr_pppp,
1253 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1254 };
1255 if (a->s) {
1256 return do_pppp_flags(s, a, &op);
1257 } else if (a->pg == a->rn && a->rn == a->rm) {
1258 return do_mov_p(s, a->rd, a->rn);
1259 } else {
1260 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261 }
1262}
1263
1264static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265{
1266 tcg_gen_orc_i64(pd, pn, pm);
1267 tcg_gen_and_i64(pd, pd, pg);
1268}
1269
1270static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271 TCGv_vec pm, TCGv_vec pg)
1272{
1273 tcg_gen_orc_vec(vece, pd, pn, pm);
1274 tcg_gen_and_vec(vece, pd, pd, pg);
1275}
1276
1277static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1278{
1279 static const GVecGen4 op = {
1280 .fni8 = gen_orn_pg_i64,
1281 .fniv = gen_orn_pg_vec,
1282 .fno = gen_helper_sve_orn_pppp,
1283 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284 };
1285 if (a->s) {
1286 return do_pppp_flags(s, a, &op);
1287 } else {
1288 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289 }
1290}
1291
1292static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1293{
1294 tcg_gen_or_i64(pd, pn, pm);
1295 tcg_gen_andc_i64(pd, pg, pd);
1296}
1297
1298static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1299 TCGv_vec pm, TCGv_vec pg)
1300{
1301 tcg_gen_or_vec(vece, pd, pn, pm);
1302 tcg_gen_andc_vec(vece, pd, pg, pd);
1303}
1304
1305static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1306{
1307 static const GVecGen4 op = {
1308 .fni8 = gen_nor_pg_i64,
1309 .fniv = gen_nor_pg_vec,
1310 .fno = gen_helper_sve_nor_pppp,
1311 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1312 };
1313 if (a->s) {
1314 return do_pppp_flags(s, a, &op);
1315 } else {
1316 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1317 }
1318}
1319
1320static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1321{
1322 tcg_gen_and_i64(pd, pn, pm);
1323 tcg_gen_andc_i64(pd, pg, pd);
1324}
1325
1326static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1327 TCGv_vec pm, TCGv_vec pg)
1328{
1329 tcg_gen_and_vec(vece, pd, pn, pm);
1330 tcg_gen_andc_vec(vece, pd, pg, pd);
1331}
1332
1333static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1334{
1335 static const GVecGen4 op = {
1336 .fni8 = gen_nand_pg_i64,
1337 .fniv = gen_nand_pg_vec,
1338 .fno = gen_helper_sve_nand_pppp,
1339 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1340 };
1341 if (a->s) {
1342 return do_pppp_flags(s, a, &op);
1343 } else {
1344 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1345 }
1346}
1347
9e18d7a6
RH
1348/*
1349 *** SVE Predicate Misc Group
1350 */
1351
1352static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1353{
1354 if (sve_access_check(s)) {
1355 int nofs = pred_full_reg_offset(s, a->rn);
1356 int gofs = pred_full_reg_offset(s, a->pg);
1357 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1358
1359 if (words == 1) {
1360 TCGv_i64 pn = tcg_temp_new_i64();
1361 TCGv_i64 pg = tcg_temp_new_i64();
1362
1363 tcg_gen_ld_i64(pn, cpu_env, nofs);
1364 tcg_gen_ld_i64(pg, cpu_env, gofs);
1365 do_predtest1(pn, pg);
1366
1367 tcg_temp_free_i64(pn);
1368 tcg_temp_free_i64(pg);
1369 } else {
1370 do_predtest(s, nofs, gofs, words);
1371 }
1372 }
1373 return true;
1374}
1375
028e2a7b
RH
1376/* See the ARM pseudocode DecodePredCount. */
1377static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1378{
1379 unsigned elements = fullsz >> esz;
1380 unsigned bound;
1381
1382 switch (pattern) {
1383 case 0x0: /* POW2 */
1384 return pow2floor(elements);
1385 case 0x1: /* VL1 */
1386 case 0x2: /* VL2 */
1387 case 0x3: /* VL3 */
1388 case 0x4: /* VL4 */
1389 case 0x5: /* VL5 */
1390 case 0x6: /* VL6 */
1391 case 0x7: /* VL7 */
1392 case 0x8: /* VL8 */
1393 bound = pattern;
1394 break;
1395 case 0x9: /* VL16 */
1396 case 0xa: /* VL32 */
1397 case 0xb: /* VL64 */
1398 case 0xc: /* VL128 */
1399 case 0xd: /* VL256 */
1400 bound = 16 << (pattern - 9);
1401 break;
1402 case 0x1d: /* MUL4 */
1403 return elements - elements % 4;
1404 case 0x1e: /* MUL3 */
1405 return elements - elements % 3;
1406 case 0x1f: /* ALL */
1407 return elements;
1408 default: /* #uimm5 */
1409 return 0;
1410 }
1411 return elements >= bound ? bound : 0;
1412}
1413
1414/* This handles all of the predicate initialization instructions,
1415 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1416 * so that decode_pred_count returns 0. For SETFFR, we will have
1417 * set RD == 16 == FFR.
1418 */
1419static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1420{
1421 if (!sve_access_check(s)) {
1422 return true;
1423 }
1424
1425 unsigned fullsz = vec_full_reg_size(s);
1426 unsigned ofs = pred_full_reg_offset(s, rd);
1427 unsigned numelem, setsz, i;
1428 uint64_t word, lastword;
1429 TCGv_i64 t;
1430
1431 numelem = decode_pred_count(fullsz, pat, esz);
1432
1433 /* Determine what we must store into each bit, and how many. */
1434 if (numelem == 0) {
1435 lastword = word = 0;
1436 setsz = fullsz;
1437 } else {
1438 setsz = numelem << esz;
1439 lastword = word = pred_esz_masks[esz];
1440 if (setsz % 64) {
1441 lastword &= ~(-1ull << (setsz % 64));
1442 }
1443 }
1444
1445 t = tcg_temp_new_i64();
1446 if (fullsz <= 64) {
1447 tcg_gen_movi_i64(t, lastword);
1448 tcg_gen_st_i64(t, cpu_env, ofs);
1449 goto done;
1450 }
1451
1452 if (word == lastword) {
1453 unsigned maxsz = size_for_gvec(fullsz / 8);
1454 unsigned oprsz = size_for_gvec(setsz / 8);
1455
1456 if (oprsz * 8 == setsz) {
1457 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1458 goto done;
1459 }
1460 if (oprsz * 8 == setsz + 8) {
1461 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1462 tcg_gen_movi_i64(t, 0);
1463 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1464 goto done;
1465 }
1466 }
1467
1468 setsz /= 8;
1469 fullsz /= 8;
1470
1471 tcg_gen_movi_i64(t, word);
1472 for (i = 0; i < setsz; i += 8) {
1473 tcg_gen_st_i64(t, cpu_env, ofs + i);
1474 }
1475 if (lastword != word) {
1476 tcg_gen_movi_i64(t, lastword);
1477 tcg_gen_st_i64(t, cpu_env, ofs + i);
1478 i += 8;
1479 }
1480 if (i < fullsz) {
1481 tcg_gen_movi_i64(t, 0);
1482 for (; i < fullsz; i += 8) {
1483 tcg_gen_st_i64(t, cpu_env, ofs + i);
1484 }
1485 }
1486
1487 done:
1488 tcg_temp_free_i64(t);
1489
1490 /* PTRUES */
1491 if (setflag) {
1492 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1493 tcg_gen_movi_i32(cpu_CF, word == 0);
1494 tcg_gen_movi_i32(cpu_VF, 0);
1495 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1496 }
1497 return true;
1498}
1499
1500static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1501{
1502 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1503}
1504
1505static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1506{
1507 /* Note pat == 31 is #all, to set all elements. */
1508 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1509}
1510
1511static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1512{
1513 /* Note pat == 32 is #unimp, to set no elements. */
1514 return do_predset(s, 0, a->rd, 32, false);
1515}
1516
1517static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1518{
1519 /* The path through do_pppp_flags is complicated enough to want to avoid
1520 * duplication. Frob the arguments into the form of a predicated AND.
1521 */
1522 arg_rprr_s alt_a = {
1523 .rd = a->rd, .pg = a->pg, .s = a->s,
1524 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1525 };
1526 return trans_AND_pppp(s, &alt_a, insn);
1527}
1528
1529static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1530{
1531 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1532}
1533
1534static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1535{
1536 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1537}
1538
1539static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1540 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1541 TCGv_ptr, TCGv_i32))
1542{
1543 if (!sve_access_check(s)) {
1544 return true;
1545 }
1546
1547 TCGv_ptr t_pd = tcg_temp_new_ptr();
1548 TCGv_ptr t_pg = tcg_temp_new_ptr();
1549 TCGv_i32 t;
1550 unsigned desc;
1551
1552 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1553 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1554
1555 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1556 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1557 t = tcg_const_i32(desc);
1558
1559 gen_fn(t, t_pd, t_pg, t);
1560 tcg_temp_free_ptr(t_pd);
1561 tcg_temp_free_ptr(t_pg);
1562
1563 do_pred_flags(t);
1564 tcg_temp_free_i32(t);
1565 return true;
1566}
1567
1568static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1569{
1570 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1571}
1572
1573static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1574{
1575 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1576}
1577
24e82e68
RH
1578/*
1579 *** SVE Element Count Group
1580 */
1581
1582/* Perform an inline saturating addition of a 32-bit value within
1583 * a 64-bit register. The second operand is known to be positive,
1584 * which halves the comparisions we must perform to bound the result.
1585 */
1586static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1587{
1588 int64_t ibound;
1589 TCGv_i64 bound;
1590 TCGCond cond;
1591
1592 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1593 if (u) {
1594 tcg_gen_ext32u_i64(reg, reg);
1595 } else {
1596 tcg_gen_ext32s_i64(reg, reg);
1597 }
1598 if (d) {
1599 tcg_gen_sub_i64(reg, reg, val);
1600 ibound = (u ? 0 : INT32_MIN);
1601 cond = TCG_COND_LT;
1602 } else {
1603 tcg_gen_add_i64(reg, reg, val);
1604 ibound = (u ? UINT32_MAX : INT32_MAX);
1605 cond = TCG_COND_GT;
1606 }
1607 bound = tcg_const_i64(ibound);
1608 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1609 tcg_temp_free_i64(bound);
1610}
1611
1612/* Similarly with 64-bit values. */
1613static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1614{
1615 TCGv_i64 t0 = tcg_temp_new_i64();
1616 TCGv_i64 t1 = tcg_temp_new_i64();
1617 TCGv_i64 t2;
1618
1619 if (u) {
1620 if (d) {
1621 tcg_gen_sub_i64(t0, reg, val);
1622 tcg_gen_movi_i64(t1, 0);
1623 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1624 } else {
1625 tcg_gen_add_i64(t0, reg, val);
1626 tcg_gen_movi_i64(t1, -1);
1627 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1628 }
1629 } else {
1630 if (d) {
1631 /* Detect signed overflow for subtraction. */
1632 tcg_gen_xor_i64(t0, reg, val);
1633 tcg_gen_sub_i64(t1, reg, val);
1634 tcg_gen_xor_i64(reg, reg, t0);
1635 tcg_gen_and_i64(t0, t0, reg);
1636
1637 /* Bound the result. */
1638 tcg_gen_movi_i64(reg, INT64_MIN);
1639 t2 = tcg_const_i64(0);
1640 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1641 } else {
1642 /* Detect signed overflow for addition. */
1643 tcg_gen_xor_i64(t0, reg, val);
1644 tcg_gen_add_i64(reg, reg, val);
1645 tcg_gen_xor_i64(t1, reg, val);
1646 tcg_gen_andc_i64(t0, t1, t0);
1647
1648 /* Bound the result. */
1649 tcg_gen_movi_i64(t1, INT64_MAX);
1650 t2 = tcg_const_i64(0);
1651 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1652 }
1653 tcg_temp_free_i64(t2);
1654 }
1655 tcg_temp_free_i64(t0);
1656 tcg_temp_free_i64(t1);
1657}
1658
1659/* Similarly with a vector and a scalar operand. */
1660static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1661 TCGv_i64 val, bool u, bool d)
1662{
1663 unsigned vsz = vec_full_reg_size(s);
1664 TCGv_ptr dptr, nptr;
1665 TCGv_i32 t32, desc;
1666 TCGv_i64 t64;
1667
1668 dptr = tcg_temp_new_ptr();
1669 nptr = tcg_temp_new_ptr();
1670 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1671 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1672 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1673
1674 switch (esz) {
1675 case MO_8:
1676 t32 = tcg_temp_new_i32();
1677 tcg_gen_extrl_i64_i32(t32, val);
1678 if (d) {
1679 tcg_gen_neg_i32(t32, t32);
1680 }
1681 if (u) {
1682 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1683 } else {
1684 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1685 }
1686 tcg_temp_free_i32(t32);
1687 break;
1688
1689 case MO_16:
1690 t32 = tcg_temp_new_i32();
1691 tcg_gen_extrl_i64_i32(t32, val);
1692 if (d) {
1693 tcg_gen_neg_i32(t32, t32);
1694 }
1695 if (u) {
1696 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1697 } else {
1698 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1699 }
1700 tcg_temp_free_i32(t32);
1701 break;
1702
1703 case MO_32:
1704 t64 = tcg_temp_new_i64();
1705 if (d) {
1706 tcg_gen_neg_i64(t64, val);
1707 } else {
1708 tcg_gen_mov_i64(t64, val);
1709 }
1710 if (u) {
1711 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1712 } else {
1713 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1714 }
1715 tcg_temp_free_i64(t64);
1716 break;
1717
1718 case MO_64:
1719 if (u) {
1720 if (d) {
1721 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1722 } else {
1723 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1724 }
1725 } else if (d) {
1726 t64 = tcg_temp_new_i64();
1727 tcg_gen_neg_i64(t64, val);
1728 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1729 tcg_temp_free_i64(t64);
1730 } else {
1731 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1732 }
1733 break;
1734
1735 default:
1736 g_assert_not_reached();
1737 }
1738
1739 tcg_temp_free_ptr(dptr);
1740 tcg_temp_free_ptr(nptr);
1741 tcg_temp_free_i32(desc);
1742}
1743
1744static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1745{
1746 if (sve_access_check(s)) {
1747 unsigned fullsz = vec_full_reg_size(s);
1748 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1749 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1750 }
1751 return true;
1752}
1753
1754static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1755{
1756 if (sve_access_check(s)) {
1757 unsigned fullsz = vec_full_reg_size(s);
1758 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1759 int inc = numelem * a->imm * (a->d ? -1 : 1);
1760 TCGv_i64 reg = cpu_reg(s, a->rd);
1761
1762 tcg_gen_addi_i64(reg, reg, inc);
1763 }
1764 return true;
1765}
1766
1767static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1768 uint32_t insn)
1769{
1770 if (!sve_access_check(s)) {
1771 return true;
1772 }
1773
1774 unsigned fullsz = vec_full_reg_size(s);
1775 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1776 int inc = numelem * a->imm;
1777 TCGv_i64 reg = cpu_reg(s, a->rd);
1778
1779 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1780 if (inc == 0) {
1781 if (a->u) {
1782 tcg_gen_ext32u_i64(reg, reg);
1783 } else {
1784 tcg_gen_ext32s_i64(reg, reg);
1785 }
1786 } else {
1787 TCGv_i64 t = tcg_const_i64(inc);
1788 do_sat_addsub_32(reg, t, a->u, a->d);
1789 tcg_temp_free_i64(t);
1790 }
1791 return true;
1792}
1793
1794static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1795 uint32_t insn)
1796{
1797 if (!sve_access_check(s)) {
1798 return true;
1799 }
1800
1801 unsigned fullsz = vec_full_reg_size(s);
1802 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1803 int inc = numelem * a->imm;
1804 TCGv_i64 reg = cpu_reg(s, a->rd);
1805
1806 if (inc != 0) {
1807 TCGv_i64 t = tcg_const_i64(inc);
1808 do_sat_addsub_64(reg, t, a->u, a->d);
1809 tcg_temp_free_i64(t);
1810 }
1811 return true;
1812}
1813
1814static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1815{
1816 if (a->esz == 0) {
1817 return false;
1818 }
1819
1820 unsigned fullsz = vec_full_reg_size(s);
1821 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1822 int inc = numelem * a->imm;
1823
1824 if (inc != 0) {
1825 if (sve_access_check(s)) {
1826 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1827 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1828 vec_full_reg_offset(s, a->rn),
1829 t, fullsz, fullsz);
1830 tcg_temp_free_i64(t);
1831 }
1832 } else {
1833 do_mov_z(s, a->rd, a->rn);
1834 }
1835 return true;
1836}
1837
1838static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1839 uint32_t insn)
1840{
1841 if (a->esz == 0) {
1842 return false;
1843 }
1844
1845 unsigned fullsz = vec_full_reg_size(s);
1846 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1847 int inc = numelem * a->imm;
1848
1849 if (inc != 0) {
1850 if (sve_access_check(s)) {
1851 TCGv_i64 t = tcg_const_i64(inc);
1852 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1853 tcg_temp_free_i64(t);
1854 }
1855 } else {
1856 do_mov_z(s, a->rd, a->rn);
1857 }
1858 return true;
1859}
1860
e1fa1164
RH
1861/*
1862 *** SVE Bitwise Immediate Group
1863 */
1864
1865static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1866{
1867 uint64_t imm;
1868 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1869 extract32(a->dbm, 0, 6),
1870 extract32(a->dbm, 6, 6))) {
1871 return false;
1872 }
1873 if (sve_access_check(s)) {
1874 unsigned vsz = vec_full_reg_size(s);
1875 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1876 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1877 }
1878 return true;
1879}
1880
1881static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1882{
1883 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1884}
1885
1886static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1887{
1888 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1889}
1890
1891static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1892{
1893 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1894}
1895
1896static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1897{
1898 uint64_t imm;
1899 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1900 extract32(a->dbm, 0, 6),
1901 extract32(a->dbm, 6, 6))) {
1902 return false;
1903 }
1904 if (sve_access_check(s)) {
1905 do_dupi_z(s, a->rd, imm);
1906 }
1907 return true;
1908}
1909
f25a2361
RH
1910/*
1911 *** SVE Integer Wide Immediate - Predicated Group
1912 */
1913
1914/* Implement all merging copies. This is used for CPY (immediate),
1915 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1916 */
1917static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1918 TCGv_i64 val)
1919{
1920 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1921 static gen_cpy * const fns[4] = {
1922 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1923 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1924 };
1925 unsigned vsz = vec_full_reg_size(s);
1926 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1927 TCGv_ptr t_zd = tcg_temp_new_ptr();
1928 TCGv_ptr t_zn = tcg_temp_new_ptr();
1929 TCGv_ptr t_pg = tcg_temp_new_ptr();
1930
1931 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1932 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1933 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1934
1935 fns[esz](t_zd, t_zn, t_pg, val, desc);
1936
1937 tcg_temp_free_ptr(t_zd);
1938 tcg_temp_free_ptr(t_zn);
1939 tcg_temp_free_ptr(t_pg);
1940 tcg_temp_free_i32(desc);
1941}
1942
1943static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1944{
1945 if (a->esz == 0) {
1946 return false;
1947 }
1948 if (sve_access_check(s)) {
1949 /* Decode the VFP immediate. */
1950 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1951 TCGv_i64 t_imm = tcg_const_i64(imm);
1952 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1953 tcg_temp_free_i64(t_imm);
1954 }
1955 return true;
1956}
1957
1958static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1959{
1960 if (a->esz == 0 && extract32(insn, 13, 1)) {
1961 return false;
1962 }
1963 if (sve_access_check(s)) {
1964 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1965 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1966 tcg_temp_free_i64(t_imm);
1967 }
1968 return true;
1969}
1970
1971static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1972{
1973 static gen_helper_gvec_2i * const fns[4] = {
1974 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1975 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1976 };
1977
1978 if (a->esz == 0 && extract32(insn, 13, 1)) {
1979 return false;
1980 }
1981 if (sve_access_check(s)) {
1982 unsigned vsz = vec_full_reg_size(s);
1983 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1984 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1985 pred_full_reg_offset(s, a->pg),
1986 t_imm, vsz, vsz, 0, fns[a->esz]);
1987 tcg_temp_free_i64(t_imm);
1988 }
1989 return true;
1990}
1991
b94f8f60
RH
1992/*
1993 *** SVE Permute Extract Group
1994 */
1995
1996static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1997{
1998 if (!sve_access_check(s)) {
1999 return true;
2000 }
2001
2002 unsigned vsz = vec_full_reg_size(s);
2003 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
2004 unsigned n_siz = vsz - n_ofs;
2005 unsigned d = vec_full_reg_offset(s, a->rd);
2006 unsigned n = vec_full_reg_offset(s, a->rn);
2007 unsigned m = vec_full_reg_offset(s, a->rm);
2008
2009 /* Use host vector move insns if we have appropriate sizes
2010 * and no unfortunate overlap.
2011 */
2012 if (m != d
2013 && n_ofs == size_for_gvec(n_ofs)
2014 && n_siz == size_for_gvec(n_siz)
2015 && (d != n || n_siz <= n_ofs)) {
2016 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2017 if (n_ofs != 0) {
2018 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2019 }
2020 } else {
2021 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2022 }
2023 return true;
2024}
2025
30562ab7
RH
2026/*
2027 *** SVE Permute - Unpredicated Group
2028 */
2029
2030static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2031{
2032 if (sve_access_check(s)) {
2033 unsigned vsz = vec_full_reg_size(s);
2034 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2035 vsz, vsz, cpu_reg_sp(s, a->rn));
2036 }
2037 return true;
2038}
2039
2040static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2041{
2042 if ((a->imm & 0x1f) == 0) {
2043 return false;
2044 }
2045 if (sve_access_check(s)) {
2046 unsigned vsz = vec_full_reg_size(s);
2047 unsigned dofs = vec_full_reg_offset(s, a->rd);
2048 unsigned esz, index;
2049
2050 esz = ctz32(a->imm);
2051 index = a->imm >> (esz + 1);
2052
2053 if ((index << esz) < vsz) {
2054 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2055 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2056 } else {
2057 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2058 }
2059 }
2060 return true;
2061}
2062
2063static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2064{
2065 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2066 static gen_insr * const fns[4] = {
2067 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2068 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2069 };
2070 unsigned vsz = vec_full_reg_size(s);
2071 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2072 TCGv_ptr t_zd = tcg_temp_new_ptr();
2073 TCGv_ptr t_zn = tcg_temp_new_ptr();
2074
2075 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2076 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2077
2078 fns[a->esz](t_zd, t_zn, val, desc);
2079
2080 tcg_temp_free_ptr(t_zd);
2081 tcg_temp_free_ptr(t_zn);
2082 tcg_temp_free_i32(desc);
2083}
2084
2085static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2086{
2087 if (sve_access_check(s)) {
2088 TCGv_i64 t = tcg_temp_new_i64();
2089 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2090 do_insr_i64(s, a, t);
2091 tcg_temp_free_i64(t);
2092 }
2093 return true;
2094}
2095
2096static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2097{
2098 if (sve_access_check(s)) {
2099 do_insr_i64(s, a, cpu_reg(s, a->rm));
2100 }
2101 return true;
2102}
2103
2104static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2105{
2106 static gen_helper_gvec_2 * const fns[4] = {
2107 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2108 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2109 };
2110
2111 if (sve_access_check(s)) {
2112 unsigned vsz = vec_full_reg_size(s);
2113 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2114 vec_full_reg_offset(s, a->rn),
2115 vsz, vsz, 0, fns[a->esz]);
2116 }
2117 return true;
2118}
2119
2120static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2121{
2122 static gen_helper_gvec_3 * const fns[4] = {
2123 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2124 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2125 };
2126
2127 if (sve_access_check(s)) {
2128 unsigned vsz = vec_full_reg_size(s);
2129 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2130 vec_full_reg_offset(s, a->rn),
2131 vec_full_reg_offset(s, a->rm),
2132 vsz, vsz, 0, fns[a->esz]);
2133 }
2134 return true;
2135}
2136
2137static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2138{
2139 static gen_helper_gvec_2 * const fns[4][2] = {
2140 { NULL, NULL },
2141 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2142 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2143 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2144 };
2145
2146 if (a->esz == 0) {
2147 return false;
2148 }
2149 if (sve_access_check(s)) {
2150 unsigned vsz = vec_full_reg_size(s);
2151 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2152 vec_full_reg_offset(s, a->rn)
2153 + (a->h ? vsz / 2 : 0),
2154 vsz, vsz, 0, fns[a->esz][a->u]);
2155 }
2156 return true;
2157}
2158
d731d8cb
RH
2159/*
2160 *** SVE Permute - Predicates Group
2161 */
2162
2163static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2164 gen_helper_gvec_3 *fn)
2165{
2166 if (!sve_access_check(s)) {
2167 return true;
2168 }
2169
2170 unsigned vsz = pred_full_reg_size(s);
2171
2172 /* Predicate sizes may be smaller and cannot use simd_desc.
2173 We cannot round up, as we do elsewhere, because we need
2174 the exact size for ZIP2 and REV. We retain the style for
2175 the other helpers for consistency. */
2176 TCGv_ptr t_d = tcg_temp_new_ptr();
2177 TCGv_ptr t_n = tcg_temp_new_ptr();
2178 TCGv_ptr t_m = tcg_temp_new_ptr();
2179 TCGv_i32 t_desc;
2180 int desc;
2181
2182 desc = vsz - 2;
2183 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2184 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2185
2186 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2187 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2188 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2189 t_desc = tcg_const_i32(desc);
2190
2191 fn(t_d, t_n, t_m, t_desc);
2192
2193 tcg_temp_free_ptr(t_d);
2194 tcg_temp_free_ptr(t_n);
2195 tcg_temp_free_ptr(t_m);
2196 tcg_temp_free_i32(t_desc);
2197 return true;
2198}
2199
2200static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2201 gen_helper_gvec_2 *fn)
2202{
2203 if (!sve_access_check(s)) {
2204 return true;
2205 }
2206
2207 unsigned vsz = pred_full_reg_size(s);
2208 TCGv_ptr t_d = tcg_temp_new_ptr();
2209 TCGv_ptr t_n = tcg_temp_new_ptr();
2210 TCGv_i32 t_desc;
2211 int desc;
2212
2213 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2214 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2215
2216 /* Predicate sizes may be smaller and cannot use simd_desc.
2217 We cannot round up, as we do elsewhere, because we need
2218 the exact size for ZIP2 and REV. We retain the style for
2219 the other helpers for consistency. */
2220
2221 desc = vsz - 2;
2222 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2223 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2224 t_desc = tcg_const_i32(desc);
2225
2226 fn(t_d, t_n, t_desc);
2227
2228 tcg_temp_free_i32(t_desc);
2229 tcg_temp_free_ptr(t_d);
2230 tcg_temp_free_ptr(t_n);
2231 return true;
2232}
2233
2234static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2235{
2236 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2237}
2238
2239static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2240{
2241 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2242}
2243
2244static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2245{
2246 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2247}
2248
2249static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2250{
2251 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2252}
2253
2254static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2255{
2256 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2257}
2258
2259static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2260{
2261 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2262}
2263
2264static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2265{
2266 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2267}
2268
2269static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2270{
2271 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2272}
2273
2274static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2275{
2276 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2277}
2278
234b48e9
RH
2279/*
2280 *** SVE Permute - Interleaving Group
2281 */
2282
2283static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2284{
2285 static gen_helper_gvec_3 * const fns[4] = {
2286 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2287 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2288 };
2289
2290 if (sve_access_check(s)) {
2291 unsigned vsz = vec_full_reg_size(s);
2292 unsigned high_ofs = high ? vsz / 2 : 0;
2293 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2294 vec_full_reg_offset(s, a->rn) + high_ofs,
2295 vec_full_reg_offset(s, a->rm) + high_ofs,
2296 vsz, vsz, 0, fns[a->esz]);
2297 }
2298 return true;
2299}
2300
2301static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2302 gen_helper_gvec_3 *fn)
2303{
2304 if (sve_access_check(s)) {
2305 unsigned vsz = vec_full_reg_size(s);
2306 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2307 vec_full_reg_offset(s, a->rn),
2308 vec_full_reg_offset(s, a->rm),
2309 vsz, vsz, data, fn);
2310 }
2311 return true;
2312}
2313
2314static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2315{
2316 return do_zip(s, a, false);
2317}
2318
2319static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2320{
2321 return do_zip(s, a, true);
2322}
2323
2324static gen_helper_gvec_3 * const uzp_fns[4] = {
2325 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2326 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2327};
2328
2329static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2330{
2331 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2332}
2333
2334static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2335{
2336 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2337}
2338
2339static gen_helper_gvec_3 * const trn_fns[4] = {
2340 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2341 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2342};
2343
2344static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2345{
2346 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2347}
2348
2349static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2350{
2351 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2352}
2353
3ca879ae
RH
2354/*
2355 *** SVE Permute Vector - Predicated Group
2356 */
2357
2358static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2359{
2360 static gen_helper_gvec_3 * const fns[4] = {
2361 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2362 };
2363 return do_zpz_ool(s, a, fns[a->esz]);
2364}
2365
ef23cb72
RH
2366/* Call the helper that computes the ARM LastActiveElement pseudocode
2367 * function, scaled by the element size. This includes the not found
2368 * indication; e.g. not found for esz=3 is -8.
2369 */
2370static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2371{
2372 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2373 * round up, as we do elsewhere, because we need the exact size.
2374 */
2375 TCGv_ptr t_p = tcg_temp_new_ptr();
2376 TCGv_i32 t_desc;
2377 unsigned vsz = pred_full_reg_size(s);
2378 unsigned desc;
2379
2380 desc = vsz - 2;
2381 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2382
2383 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2384 t_desc = tcg_const_i32(desc);
2385
2386 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2387
2388 tcg_temp_free_i32(t_desc);
2389 tcg_temp_free_ptr(t_p);
2390}
2391
2392/* Increment LAST to the offset of the next element in the vector,
2393 * wrapping around to 0.
2394 */
2395static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2396{
2397 unsigned vsz = vec_full_reg_size(s);
2398
2399 tcg_gen_addi_i32(last, last, 1 << esz);
2400 if (is_power_of_2(vsz)) {
2401 tcg_gen_andi_i32(last, last, vsz - 1);
2402 } else {
2403 TCGv_i32 max = tcg_const_i32(vsz);
2404 TCGv_i32 zero = tcg_const_i32(0);
2405 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2406 tcg_temp_free_i32(max);
2407 tcg_temp_free_i32(zero);
2408 }
2409}
2410
2411/* If LAST < 0, set LAST to the offset of the last element in the vector. */
2412static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2413{
2414 unsigned vsz = vec_full_reg_size(s);
2415
2416 if (is_power_of_2(vsz)) {
2417 tcg_gen_andi_i32(last, last, vsz - 1);
2418 } else {
2419 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2420 TCGv_i32 zero = tcg_const_i32(0);
2421 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2422 tcg_temp_free_i32(max);
2423 tcg_temp_free_i32(zero);
2424 }
2425}
2426
2427/* Load an unsigned element of ESZ from BASE+OFS. */
2428static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2429{
2430 TCGv_i64 r = tcg_temp_new_i64();
2431
2432 switch (esz) {
2433 case 0:
2434 tcg_gen_ld8u_i64(r, base, ofs);
2435 break;
2436 case 1:
2437 tcg_gen_ld16u_i64(r, base, ofs);
2438 break;
2439 case 2:
2440 tcg_gen_ld32u_i64(r, base, ofs);
2441 break;
2442 case 3:
2443 tcg_gen_ld_i64(r, base, ofs);
2444 break;
2445 default:
2446 g_assert_not_reached();
2447 }
2448 return r;
2449}
2450
2451/* Load an unsigned element of ESZ from RM[LAST]. */
2452static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2453 int rm, int esz)
2454{
2455 TCGv_ptr p = tcg_temp_new_ptr();
2456 TCGv_i64 r;
2457
2458 /* Convert offset into vector into offset into ENV.
2459 * The final adjustment for the vector register base
2460 * is added via constant offset to the load.
2461 */
2462#ifdef HOST_WORDS_BIGENDIAN
2463 /* Adjust for element ordering. See vec_reg_offset. */
2464 if (esz < 3) {
2465 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2466 }
2467#endif
2468 tcg_gen_ext_i32_ptr(p, last);
2469 tcg_gen_add_ptr(p, p, cpu_env);
2470
2471 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2472 tcg_temp_free_ptr(p);
2473
2474 return r;
2475}
2476
2477/* Compute CLAST for a Zreg. */
2478static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2479{
2480 TCGv_i32 last;
2481 TCGLabel *over;
2482 TCGv_i64 ele;
2483 unsigned vsz, esz = a->esz;
2484
2485 if (!sve_access_check(s)) {
2486 return true;
2487 }
2488
2489 last = tcg_temp_local_new_i32();
2490 over = gen_new_label();
2491
2492 find_last_active(s, last, esz, a->pg);
2493
2494 /* There is of course no movcond for a 2048-bit vector,
2495 * so we must branch over the actual store.
2496 */
2497 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2498
2499 if (!before) {
2500 incr_last_active(s, last, esz);
2501 }
2502
2503 ele = load_last_active(s, last, a->rm, esz);
2504 tcg_temp_free_i32(last);
2505
2506 vsz = vec_full_reg_size(s);
2507 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2508 tcg_temp_free_i64(ele);
2509
2510 /* If this insn used MOVPRFX, we may need a second move. */
2511 if (a->rd != a->rn) {
2512 TCGLabel *done = gen_new_label();
2513 tcg_gen_br(done);
2514
2515 gen_set_label(over);
2516 do_mov_z(s, a->rd, a->rn);
2517
2518 gen_set_label(done);
2519 } else {
2520 gen_set_label(over);
2521 }
2522 return true;
2523}
2524
2525static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2526{
2527 return do_clast_vector(s, a, false);
2528}
2529
2530static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2531{
2532 return do_clast_vector(s, a, true);
2533}
2534
2535/* Compute CLAST for a scalar. */
2536static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2537 bool before, TCGv_i64 reg_val)
2538{
2539 TCGv_i32 last = tcg_temp_new_i32();
2540 TCGv_i64 ele, cmp, zero;
2541
2542 find_last_active(s, last, esz, pg);
2543
2544 /* Extend the original value of last prior to incrementing. */
2545 cmp = tcg_temp_new_i64();
2546 tcg_gen_ext_i32_i64(cmp, last);
2547
2548 if (!before) {
2549 incr_last_active(s, last, esz);
2550 }
2551
2552 /* The conceit here is that while last < 0 indicates not found, after
2553 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2554 * from which we can load garbage. We then discard the garbage with
2555 * a conditional move.
2556 */
2557 ele = load_last_active(s, last, rm, esz);
2558 tcg_temp_free_i32(last);
2559
2560 zero = tcg_const_i64(0);
2561 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2562
2563 tcg_temp_free_i64(zero);
2564 tcg_temp_free_i64(cmp);
2565 tcg_temp_free_i64(ele);
2566}
2567
2568/* Compute CLAST for a Vreg. */
2569static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2570{
2571 if (sve_access_check(s)) {
2572 int esz = a->esz;
2573 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2574 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2575
2576 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2577 write_fp_dreg(s, a->rd, reg);
2578 tcg_temp_free_i64(reg);
2579 }
2580 return true;
2581}
2582
2583static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2584{
2585 return do_clast_fp(s, a, false);
2586}
2587
2588static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2589{
2590 return do_clast_fp(s, a, true);
2591}
2592
2593/* Compute CLAST for a Xreg. */
2594static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2595{
2596 TCGv_i64 reg;
2597
2598 if (!sve_access_check(s)) {
2599 return true;
2600 }
2601
2602 reg = cpu_reg(s, a->rd);
2603 switch (a->esz) {
2604 case 0:
2605 tcg_gen_ext8u_i64(reg, reg);
2606 break;
2607 case 1:
2608 tcg_gen_ext16u_i64(reg, reg);
2609 break;
2610 case 2:
2611 tcg_gen_ext32u_i64(reg, reg);
2612 break;
2613 case 3:
2614 break;
2615 default:
2616 g_assert_not_reached();
2617 }
2618
2619 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2620 return true;
2621}
2622
2623static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2624{
2625 return do_clast_general(s, a, false);
2626}
2627
2628static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2629{
2630 return do_clast_general(s, a, true);
2631}
2632
2633/* Compute LAST for a scalar. */
2634static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2635 int pg, int rm, bool before)
2636{
2637 TCGv_i32 last = tcg_temp_new_i32();
2638 TCGv_i64 ret;
2639
2640 find_last_active(s, last, esz, pg);
2641 if (before) {
2642 wrap_last_active(s, last, esz);
2643 } else {
2644 incr_last_active(s, last, esz);
2645 }
2646
2647 ret = load_last_active(s, last, rm, esz);
2648 tcg_temp_free_i32(last);
2649 return ret;
2650}
2651
2652/* Compute LAST for a Vreg. */
2653static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2654{
2655 if (sve_access_check(s)) {
2656 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2657 write_fp_dreg(s, a->rd, val);
2658 tcg_temp_free_i64(val);
2659 }
2660 return true;
2661}
2662
2663static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2664{
2665 return do_last_fp(s, a, false);
2666}
2667
2668static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2669{
2670 return do_last_fp(s, a, true);
2671}
2672
2673/* Compute LAST for a Xreg. */
2674static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2675{
2676 if (sve_access_check(s)) {
2677 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2678 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2679 tcg_temp_free_i64(val);
2680 }
2681 return true;
2682}
2683
2684static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2685{
2686 return do_last_general(s, a, false);
2687}
2688
2689static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2690{
2691 return do_last_general(s, a, true);
2692}
2693
792a5578
RH
2694static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2695{
2696 if (sve_access_check(s)) {
2697 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2698 }
2699 return true;
2700}
2701
2702static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2703{
2704 if (sve_access_check(s)) {
2705 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2706 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2707 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2708 tcg_temp_free_i64(t);
2709 }
2710 return true;
2711}
2712
dae8fb90
RH
2713static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2714{
2715 static gen_helper_gvec_3 * const fns[4] = {
2716 NULL,
2717 gen_helper_sve_revb_h,
2718 gen_helper_sve_revb_s,
2719 gen_helper_sve_revb_d,
2720 };
2721 return do_zpz_ool(s, a, fns[a->esz]);
2722}
2723
2724static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2725{
2726 static gen_helper_gvec_3 * const fns[4] = {
2727 NULL,
2728 NULL,
2729 gen_helper_sve_revh_s,
2730 gen_helper_sve_revh_d,
2731 };
2732 return do_zpz_ool(s, a, fns[a->esz]);
2733}
2734
2735static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2736{
2737 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2738}
2739
2740static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2741{
2742 static gen_helper_gvec_3 * const fns[4] = {
2743 gen_helper_sve_rbit_b,
2744 gen_helper_sve_rbit_h,
2745 gen_helper_sve_rbit_s,
2746 gen_helper_sve_rbit_d,
2747 };
2748 return do_zpz_ool(s, a, fns[a->esz]);
2749}
2750
b48ff240
RH
2751static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2752{
2753 if (sve_access_check(s)) {
2754 unsigned vsz = vec_full_reg_size(s);
2755 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2756 vec_full_reg_offset(s, a->rn),
2757 vec_full_reg_offset(s, a->rm),
2758 pred_full_reg_offset(s, a->pg),
2759 vsz, vsz, a->esz, gen_helper_sve_splice);
2760 }
2761 return true;
2762}
2763
757f9cff
RH
2764/*
2765 *** SVE Integer Compare - Vectors Group
2766 */
2767
2768static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2769 gen_helper_gvec_flags_4 *gen_fn)
2770{
2771 TCGv_ptr pd, zn, zm, pg;
2772 unsigned vsz;
2773 TCGv_i32 t;
2774
2775 if (gen_fn == NULL) {
2776 return false;
2777 }
2778 if (!sve_access_check(s)) {
2779 return true;
2780 }
2781
2782 vsz = vec_full_reg_size(s);
2783 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2784 pd = tcg_temp_new_ptr();
2785 zn = tcg_temp_new_ptr();
2786 zm = tcg_temp_new_ptr();
2787 pg = tcg_temp_new_ptr();
2788
2789 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2790 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2791 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2792 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2793
2794 gen_fn(t, pd, zn, zm, pg, t);
2795
2796 tcg_temp_free_ptr(pd);
2797 tcg_temp_free_ptr(zn);
2798 tcg_temp_free_ptr(zm);
2799 tcg_temp_free_ptr(pg);
2800
2801 do_pred_flags(t);
2802
2803 tcg_temp_free_i32(t);
2804 return true;
2805}
2806
2807#define DO_PPZZ(NAME, name) \
2808static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2809 uint32_t insn) \
2810{ \
2811 static gen_helper_gvec_flags_4 * const fns[4] = { \
2812 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2813 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2814 }; \
2815 return do_ppzz_flags(s, a, fns[a->esz]); \
2816}
2817
2818DO_PPZZ(CMPEQ, cmpeq)
2819DO_PPZZ(CMPNE, cmpne)
2820DO_PPZZ(CMPGT, cmpgt)
2821DO_PPZZ(CMPGE, cmpge)
2822DO_PPZZ(CMPHI, cmphi)
2823DO_PPZZ(CMPHS, cmphs)
2824
2825#undef DO_PPZZ
2826
2827#define DO_PPZW(NAME, name) \
2828static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2829 uint32_t insn) \
2830{ \
2831 static gen_helper_gvec_flags_4 * const fns[4] = { \
2832 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2833 gen_helper_sve_##name##_ppzw_s, NULL \
2834 }; \
2835 return do_ppzz_flags(s, a, fns[a->esz]); \
2836}
2837
2838DO_PPZW(CMPEQ, cmpeq)
2839DO_PPZW(CMPNE, cmpne)
2840DO_PPZW(CMPGT, cmpgt)
2841DO_PPZW(CMPGE, cmpge)
2842DO_PPZW(CMPHI, cmphi)
2843DO_PPZW(CMPHS, cmphs)
2844DO_PPZW(CMPLT, cmplt)
2845DO_PPZW(CMPLE, cmple)
2846DO_PPZW(CMPLO, cmplo)
2847DO_PPZW(CMPLS, cmpls)
2848
2849#undef DO_PPZW
2850
38cadeba
RH
2851/*
2852 *** SVE Integer Compare - Immediate Groups
2853 */
2854
2855static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2856 gen_helper_gvec_flags_3 *gen_fn)
2857{
2858 TCGv_ptr pd, zn, pg;
2859 unsigned vsz;
2860 TCGv_i32 t;
2861
2862 if (gen_fn == NULL) {
2863 return false;
2864 }
2865 if (!sve_access_check(s)) {
2866 return true;
2867 }
2868
2869 vsz = vec_full_reg_size(s);
2870 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2871 pd = tcg_temp_new_ptr();
2872 zn = tcg_temp_new_ptr();
2873 pg = tcg_temp_new_ptr();
2874
2875 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2876 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2877 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2878
2879 gen_fn(t, pd, zn, pg, t);
2880
2881 tcg_temp_free_ptr(pd);
2882 tcg_temp_free_ptr(zn);
2883 tcg_temp_free_ptr(pg);
2884
2885 do_pred_flags(t);
2886
2887 tcg_temp_free_i32(t);
2888 return true;
2889}
2890
2891#define DO_PPZI(NAME, name) \
2892static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2893 uint32_t insn) \
2894{ \
2895 static gen_helper_gvec_flags_3 * const fns[4] = { \
2896 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2897 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2898 }; \
2899 return do_ppzi_flags(s, a, fns[a->esz]); \
2900}
2901
2902DO_PPZI(CMPEQ, cmpeq)
2903DO_PPZI(CMPNE, cmpne)
2904DO_PPZI(CMPGT, cmpgt)
2905DO_PPZI(CMPGE, cmpge)
2906DO_PPZI(CMPHI, cmphi)
2907DO_PPZI(CMPHS, cmphs)
2908DO_PPZI(CMPLT, cmplt)
2909DO_PPZI(CMPLE, cmple)
2910DO_PPZI(CMPLO, cmplo)
2911DO_PPZI(CMPLS, cmpls)
2912
2913#undef DO_PPZI
2914
35da316f
RH
2915/*
2916 *** SVE Partition Break Group
2917 */
2918
2919static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2920 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2921{
2922 if (!sve_access_check(s)) {
2923 return true;
2924 }
2925
2926 unsigned vsz = pred_full_reg_size(s);
2927
2928 /* Predicate sizes may be smaller and cannot use simd_desc. */
2929 TCGv_ptr d = tcg_temp_new_ptr();
2930 TCGv_ptr n = tcg_temp_new_ptr();
2931 TCGv_ptr m = tcg_temp_new_ptr();
2932 TCGv_ptr g = tcg_temp_new_ptr();
2933 TCGv_i32 t = tcg_const_i32(vsz - 2);
2934
2935 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2936 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2937 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2938 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2939
2940 if (a->s) {
2941 fn_s(t, d, n, m, g, t);
2942 do_pred_flags(t);
2943 } else {
2944 fn(d, n, m, g, t);
2945 }
2946 tcg_temp_free_ptr(d);
2947 tcg_temp_free_ptr(n);
2948 tcg_temp_free_ptr(m);
2949 tcg_temp_free_ptr(g);
2950 tcg_temp_free_i32(t);
2951 return true;
2952}
2953
2954static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2955 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2956{
2957 if (!sve_access_check(s)) {
2958 return true;
2959 }
2960
2961 unsigned vsz = pred_full_reg_size(s);
2962
2963 /* Predicate sizes may be smaller and cannot use simd_desc. */
2964 TCGv_ptr d = tcg_temp_new_ptr();
2965 TCGv_ptr n = tcg_temp_new_ptr();
2966 TCGv_ptr g = tcg_temp_new_ptr();
2967 TCGv_i32 t = tcg_const_i32(vsz - 2);
2968
2969 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2970 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2971 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2972
2973 if (a->s) {
2974 fn_s(t, d, n, g, t);
2975 do_pred_flags(t);
2976 } else {
2977 fn(d, n, g, t);
2978 }
2979 tcg_temp_free_ptr(d);
2980 tcg_temp_free_ptr(n);
2981 tcg_temp_free_ptr(g);
2982 tcg_temp_free_i32(t);
2983 return true;
2984}
2985
2986static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2987{
2988 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2989}
2990
2991static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2992{
2993 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2994}
2995
2996static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2997{
2998 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2999}
3000
3001static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3002{
3003 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3004}
3005
3006static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3007{
3008 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3009}
3010
3011static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3012{
3013 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3014}
3015
3016static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3017{
3018 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3019}
3020
9ee3a611
RH
3021/*
3022 *** SVE Predicate Count Group
3023 */
3024
3025static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3026{
3027 unsigned psz = pred_full_reg_size(s);
3028
3029 if (psz <= 8) {
3030 uint64_t psz_mask;
3031
3032 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3033 if (pn != pg) {
3034 TCGv_i64 g = tcg_temp_new_i64();
3035 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3036 tcg_gen_and_i64(val, val, g);
3037 tcg_temp_free_i64(g);
3038 }
3039
3040 /* Reduce the pred_esz_masks value simply to reduce the
3041 * size of the code generated here.
3042 */
3043 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3044 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3045
3046 tcg_gen_ctpop_i64(val, val);
3047 } else {
3048 TCGv_ptr t_pn = tcg_temp_new_ptr();
3049 TCGv_ptr t_pg = tcg_temp_new_ptr();
3050 unsigned desc;
3051 TCGv_i32 t_desc;
3052
3053 desc = psz - 2;
3054 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3055
3056 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3057 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3058 t_desc = tcg_const_i32(desc);
3059
3060 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3061 tcg_temp_free_ptr(t_pn);
3062 tcg_temp_free_ptr(t_pg);
3063 tcg_temp_free_i32(t_desc);
3064 }
3065}
3066
3067static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3068{
3069 if (sve_access_check(s)) {
3070 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3071 }
3072 return true;
3073}
3074
3075static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3076 uint32_t insn)
3077{
3078 if (sve_access_check(s)) {
3079 TCGv_i64 reg = cpu_reg(s, a->rd);
3080 TCGv_i64 val = tcg_temp_new_i64();
3081
3082 do_cntp(s, val, a->esz, a->pg, a->pg);
3083 if (a->d) {
3084 tcg_gen_sub_i64(reg, reg, val);
3085 } else {
3086 tcg_gen_add_i64(reg, reg, val);
3087 }
3088 tcg_temp_free_i64(val);
3089 }
3090 return true;
3091}
3092
3093static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3094 uint32_t insn)
3095{
3096 if (a->esz == 0) {
3097 return false;
3098 }
3099 if (sve_access_check(s)) {
3100 unsigned vsz = vec_full_reg_size(s);
3101 TCGv_i64 val = tcg_temp_new_i64();
3102 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3103
3104 do_cntp(s, val, a->esz, a->pg, a->pg);
3105 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3106 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3107 }
3108 return true;
3109}
3110
3111static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3112 uint32_t insn)
3113{
3114 if (sve_access_check(s)) {
3115 TCGv_i64 reg = cpu_reg(s, a->rd);
3116 TCGv_i64 val = tcg_temp_new_i64();
3117
3118 do_cntp(s, val, a->esz, a->pg, a->pg);
3119 do_sat_addsub_32(reg, val, a->u, a->d);
3120 }
3121 return true;
3122}
3123
3124static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3125 uint32_t insn)
3126{
3127 if (sve_access_check(s)) {
3128 TCGv_i64 reg = cpu_reg(s, a->rd);
3129 TCGv_i64 val = tcg_temp_new_i64();
3130
3131 do_cntp(s, val, a->esz, a->pg, a->pg);
3132 do_sat_addsub_64(reg, val, a->u, a->d);
3133 }
3134 return true;
3135}
3136
3137static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3138 uint32_t insn)
3139{
3140 if (a->esz == 0) {
3141 return false;
3142 }
3143 if (sve_access_check(s)) {
3144 TCGv_i64 val = tcg_temp_new_i64();
3145 do_cntp(s, val, a->esz, a->pg, a->pg);
3146 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3147 }
3148 return true;
3149}
3150
caf1cefc
RH
3151/*
3152 *** SVE Integer Compare Scalars Group
3153 */
3154
3155static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3156{
3157 if (!sve_access_check(s)) {
3158 return true;
3159 }
3160
3161 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3162 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3163 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3164 TCGv_i64 cmp = tcg_temp_new_i64();
3165
3166 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3167 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3168 tcg_temp_free_i64(cmp);
3169
3170 /* VF = !NF & !CF. */
3171 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3172 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3173
3174 /* Both NF and VF actually look at bit 31. */
3175 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3176 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3177 return true;
3178}
3179
3180static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3181{
3182 if (!sve_access_check(s)) {
3183 return true;
3184 }
3185
3186 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3187 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3188 TCGv_i64 t0 = tcg_temp_new_i64();
3189 TCGv_i64 t1 = tcg_temp_new_i64();
3190 TCGv_i32 t2, t3;
3191 TCGv_ptr ptr;
3192 unsigned desc, vsz = vec_full_reg_size(s);
3193 TCGCond cond;
3194
3195 if (!a->sf) {
3196 if (a->u) {
3197 tcg_gen_ext32u_i64(op0, op0);
3198 tcg_gen_ext32u_i64(op1, op1);
3199 } else {
3200 tcg_gen_ext32s_i64(op0, op0);
3201 tcg_gen_ext32s_i64(op1, op1);
3202 }
3203 }
3204
3205 /* For the helper, compress the different conditions into a computation
3206 * of how many iterations for which the condition is true.
3207 *
3208 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3209 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3210 * aren't that large, so any value >= predicate size is sufficient.
3211 */
3212 tcg_gen_sub_i64(t0, op1, op0);
3213
3214 /* t0 = MIN(op1 - op0, vsz). */
3215 tcg_gen_movi_i64(t1, vsz);
3216 tcg_gen_umin_i64(t0, t0, t1);
3217 if (a->eq) {
3218 /* Equality means one more iteration. */
3219 tcg_gen_addi_i64(t0, t0, 1);
3220 }
3221
3222 /* t0 = (condition true ? t0 : 0). */
3223 cond = (a->u
3224 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3225 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3226 tcg_gen_movi_i64(t1, 0);
3227 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3228
3229 t2 = tcg_temp_new_i32();
3230 tcg_gen_extrl_i64_i32(t2, t0);
3231 tcg_temp_free_i64(t0);
3232 tcg_temp_free_i64(t1);
3233
3234 desc = (vsz / 8) - 2;
3235 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3236 t3 = tcg_const_i32(desc);
3237
3238 ptr = tcg_temp_new_ptr();
3239 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3240
3241 gen_helper_sve_while(t2, ptr, t2, t3);
3242 do_pred_flags(t2);
3243
3244 tcg_temp_free_ptr(ptr);
3245 tcg_temp_free_i32(t2);
3246 tcg_temp_free_i32(t3);
3247 return true;
3248}
3249
ed491961
RH
3250/*
3251 *** SVE Integer Wide Immediate - Unpredicated Group
3252 */
3253
3254static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3255{
3256 if (a->esz == 0) {
3257 return false;
3258 }
3259 if (sve_access_check(s)) {
3260 unsigned vsz = vec_full_reg_size(s);
3261 int dofs = vec_full_reg_offset(s, a->rd);
3262 uint64_t imm;
3263
3264 /* Decode the VFP immediate. */
3265 imm = vfp_expand_imm(a->esz, a->imm);
3266 imm = dup_const(a->esz, imm);
3267
3268 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3269 }
3270 return true;
3271}
3272
3273static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3274{
3275 if (a->esz == 0 && extract32(insn, 13, 1)) {
3276 return false;
3277 }
3278 if (sve_access_check(s)) {
3279 unsigned vsz = vec_full_reg_size(s);
3280 int dofs = vec_full_reg_offset(s, a->rd);
3281
3282 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3283 }
3284 return true;
3285}
3286
6e6a157d
RH
3287static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3288{
3289 if (a->esz == 0 && extract32(insn, 13, 1)) {
3290 return false;
3291 }
3292 if (sve_access_check(s)) {
3293 unsigned vsz = vec_full_reg_size(s);
3294 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3295 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3296 }
3297 return true;
3298}
3299
3300static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3301{
3302 a->imm = -a->imm;
3303 return trans_ADD_zzi(s, a, insn);
3304}
3305
3306static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3307{
3308 static const GVecGen2s op[4] = {
3309 { .fni8 = tcg_gen_vec_sub8_i64,
3310 .fniv = tcg_gen_sub_vec,
3311 .fno = gen_helper_sve_subri_b,
3312 .opc = INDEX_op_sub_vec,
3313 .vece = MO_8,
3314 .scalar_first = true },
3315 { .fni8 = tcg_gen_vec_sub16_i64,
3316 .fniv = tcg_gen_sub_vec,
3317 .fno = gen_helper_sve_subri_h,
3318 .opc = INDEX_op_sub_vec,
3319 .vece = MO_16,
3320 .scalar_first = true },
3321 { .fni4 = tcg_gen_sub_i32,
3322 .fniv = tcg_gen_sub_vec,
3323 .fno = gen_helper_sve_subri_s,
3324 .opc = INDEX_op_sub_vec,
3325 .vece = MO_32,
3326 .scalar_first = true },
3327 { .fni8 = tcg_gen_sub_i64,
3328 .fniv = tcg_gen_sub_vec,
3329 .fno = gen_helper_sve_subri_d,
3330 .opc = INDEX_op_sub_vec,
3331 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3332 .vece = MO_64,
3333 .scalar_first = true }
3334 };
3335
3336 if (a->esz == 0 && extract32(insn, 13, 1)) {
3337 return false;
3338 }
3339 if (sve_access_check(s)) {
3340 unsigned vsz = vec_full_reg_size(s);
3341 TCGv_i64 c = tcg_const_i64(a->imm);
3342 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3343 vec_full_reg_offset(s, a->rn),
3344 vsz, vsz, c, &op[a->esz]);
3345 tcg_temp_free_i64(c);
3346 }
3347 return true;
3348}
3349
3350static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3351{
3352 if (sve_access_check(s)) {
3353 unsigned vsz = vec_full_reg_size(s);
3354 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3355 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3356 }
3357 return true;
3358}
3359
3360static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3361 bool u, bool d)
3362{
3363 if (a->esz == 0 && extract32(insn, 13, 1)) {
3364 return false;
3365 }
3366 if (sve_access_check(s)) {
3367 TCGv_i64 val = tcg_const_i64(a->imm);
3368 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3369 tcg_temp_free_i64(val);
3370 }
3371 return true;
3372}
3373
3374static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3375{
3376 return do_zzi_sat(s, a, insn, false, false);
3377}
3378
3379static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3380{
3381 return do_zzi_sat(s, a, insn, true, false);
3382}
3383
3384static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3385{
3386 return do_zzi_sat(s, a, insn, false, true);
3387}
3388
3389static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3390{
3391 return do_zzi_sat(s, a, insn, true, true);
3392}
3393
3394static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3395{
3396 if (sve_access_check(s)) {
3397 unsigned vsz = vec_full_reg_size(s);
3398 TCGv_i64 c = tcg_const_i64(a->imm);
3399
3400 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3401 vec_full_reg_offset(s, a->rn),
3402 c, vsz, vsz, 0, fn);
3403 tcg_temp_free_i64(c);
3404 }
3405 return true;
3406}
3407
3408#define DO_ZZI(NAME, name) \
3409static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3410 uint32_t insn) \
3411{ \
3412 static gen_helper_gvec_2i * const fns[4] = { \
3413 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3414 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3415 }; \
3416 return do_zzi_ool(s, a, fns[a->esz]); \
3417}
3418
3419DO_ZZI(SMAX, smax)
3420DO_ZZI(UMAX, umax)
3421DO_ZZI(SMIN, smin)
3422DO_ZZI(UMIN, umin)
3423
3424#undef DO_ZZI
3425
d730ecaa
RH
3426static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a, uint32_t insn)
3427{
3428 static gen_helper_gvec_3 * const fns[2][2] = {
3429 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3430 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3431 };
3432
3433 if (sve_access_check(s)) {
3434 unsigned vsz = vec_full_reg_size(s);
3435 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3436 vec_full_reg_offset(s, a->rn),
3437 vec_full_reg_offset(s, a->rm),
3438 vsz, vsz, 0, fns[a->u][a->sz]);
3439 }
3440 return true;
3441}
3442
16fcfdc7
RH
3443static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a, uint32_t insn)
3444{
3445 static gen_helper_gvec_3 * const fns[2][2] = {
3446 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3447 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3448 };
3449
3450 if (sve_access_check(s)) {
3451 unsigned vsz = vec_full_reg_size(s);
3452 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3453 vec_full_reg_offset(s, a->rn),
3454 vec_full_reg_offset(s, a->rm),
3455 vsz, vsz, a->index, fns[a->u][a->sz]);
3456 }
3457 return true;
3458}
3459
3460
ca40a6e6
RH
3461/*
3462 *** SVE Floating Point Multiply-Add Indexed Group
3463 */
3464
3465static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3466{
3467 static gen_helper_gvec_4_ptr * const fns[3] = {
3468 gen_helper_gvec_fmla_idx_h,
3469 gen_helper_gvec_fmla_idx_s,
3470 gen_helper_gvec_fmla_idx_d,
3471 };
3472
3473 if (sve_access_check(s)) {
3474 unsigned vsz = vec_full_reg_size(s);
3475 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3476 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3477 vec_full_reg_offset(s, a->rn),
3478 vec_full_reg_offset(s, a->rm),
3479 vec_full_reg_offset(s, a->ra),
3480 status, vsz, vsz, (a->index << 1) | a->sub,
3481 fns[a->esz - 1]);
3482 tcg_temp_free_ptr(status);
3483 }
3484 return true;
3485}
3486
3487/*
3488 *** SVE Floating Point Multiply Indexed Group
3489 */
3490
3491static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3492{
3493 static gen_helper_gvec_3_ptr * const fns[3] = {
3494 gen_helper_gvec_fmul_idx_h,
3495 gen_helper_gvec_fmul_idx_s,
3496 gen_helper_gvec_fmul_idx_d,
3497 };
3498
3499 if (sve_access_check(s)) {
3500 unsigned vsz = vec_full_reg_size(s);
3501 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3502 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3503 vec_full_reg_offset(s, a->rn),
3504 vec_full_reg_offset(s, a->rm),
3505 status, vsz, vsz, a->index, fns[a->esz - 1]);
3506 tcg_temp_free_ptr(status);
3507 }
3508 return true;
3509}
3510
23fbe79f
RH
3511/*
3512 *** SVE Floating Point Fast Reduction Group
3513 */
3514
3515typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3516 TCGv_ptr, TCGv_i32);
3517
3518static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3519 gen_helper_fp_reduce *fn)
3520{
3521 unsigned vsz = vec_full_reg_size(s);
3522 unsigned p2vsz = pow2ceil(vsz);
3523 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3524 TCGv_ptr t_zn, t_pg, status;
3525 TCGv_i64 temp;
3526
3527 temp = tcg_temp_new_i64();
3528 t_zn = tcg_temp_new_ptr();
3529 t_pg = tcg_temp_new_ptr();
3530
3531 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3532 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3533 status = get_fpstatus_ptr(a->esz == MO_16);
3534
3535 fn(temp, t_zn, t_pg, status, t_desc);
3536 tcg_temp_free_ptr(t_zn);
3537 tcg_temp_free_ptr(t_pg);
3538 tcg_temp_free_ptr(status);
3539 tcg_temp_free_i32(t_desc);
3540
3541 write_fp_dreg(s, a->rd, temp);
3542 tcg_temp_free_i64(temp);
3543}
3544
3545#define DO_VPZ(NAME, name) \
3546static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3547{ \
3548 static gen_helper_fp_reduce * const fns[3] = { \
3549 gen_helper_sve_##name##_h, \
3550 gen_helper_sve_##name##_s, \
3551 gen_helper_sve_##name##_d, \
3552 }; \
3553 if (a->esz == 0) { \
3554 return false; \
3555 } \
3556 if (sve_access_check(s)) { \
3557 do_reduce(s, a, fns[a->esz - 1]); \
3558 } \
3559 return true; \
3560}
3561
3562DO_VPZ(FADDV, faddv)
3563DO_VPZ(FMINNMV, fminnmv)
3564DO_VPZ(FMAXNMV, fmaxnmv)
3565DO_VPZ(FMINV, fminv)
3566DO_VPZ(FMAXV, fmaxv)
3567
3887c038
RH
3568/*
3569 *** SVE Floating Point Unary Operations - Unpredicated Group
3570 */
3571
3572static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3573{
3574 unsigned vsz = vec_full_reg_size(s);
3575 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3576
3577 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3578 vec_full_reg_offset(s, a->rn),
3579 status, vsz, vsz, 0, fn);
3580 tcg_temp_free_ptr(status);
3581}
3582
3583static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3584{
3585 static gen_helper_gvec_2_ptr * const fns[3] = {
3586 gen_helper_gvec_frecpe_h,
3587 gen_helper_gvec_frecpe_s,
3588 gen_helper_gvec_frecpe_d,
3589 };
3590 if (a->esz == 0) {
3591 return false;
3592 }
3593 if (sve_access_check(s)) {
3594 do_zz_fp(s, a, fns[a->esz - 1]);
3595 }
3596 return true;
3597}
3598
3599static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3600{
3601 static gen_helper_gvec_2_ptr * const fns[3] = {
3602 gen_helper_gvec_frsqrte_h,
3603 gen_helper_gvec_frsqrte_s,
3604 gen_helper_gvec_frsqrte_d,
3605 };
3606 if (a->esz == 0) {
3607 return false;
3608 }
3609 if (sve_access_check(s)) {
3610 do_zz_fp(s, a, fns[a->esz - 1]);
3611 }
3612 return true;
3613}
3614
4d2e2a03
RH
3615/*
3616 *** SVE Floating Point Compare with Zero Group
3617 */
3618
3619static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3620 gen_helper_gvec_3_ptr *fn)
3621{
3622 unsigned vsz = vec_full_reg_size(s);
3623 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3624
3625 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3626 vec_full_reg_offset(s, a->rn),
3627 pred_full_reg_offset(s, a->pg),
3628 status, vsz, vsz, 0, fn);
3629 tcg_temp_free_ptr(status);
3630}
3631
3632#define DO_PPZ(NAME, name) \
3633static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3634{ \
3635 static gen_helper_gvec_3_ptr * const fns[3] = { \
3636 gen_helper_sve_##name##_h, \
3637 gen_helper_sve_##name##_s, \
3638 gen_helper_sve_##name##_d, \
3639 }; \
3640 if (a->esz == 0) { \
3641 return false; \
3642 } \
3643 if (sve_access_check(s)) { \
3644 do_ppz_fp(s, a, fns[a->esz - 1]); \
3645 } \
3646 return true; \
3647}
3648
3649DO_PPZ(FCMGE_ppz0, fcmge0)
3650DO_PPZ(FCMGT_ppz0, fcmgt0)
3651DO_PPZ(FCMLE_ppz0, fcmle0)
3652DO_PPZ(FCMLT_ppz0, fcmlt0)
3653DO_PPZ(FCMEQ_ppz0, fcmeq0)
3654DO_PPZ(FCMNE_ppz0, fcmne0)
3655
3656#undef DO_PPZ
3657
67fcd9ad
RH
3658/*
3659 *** SVE floating-point trig multiply-add coefficient
3660 */
3661
3662static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a, uint32_t insn)
3663{
3664 static gen_helper_gvec_3_ptr * const fns[3] = {
3665 gen_helper_sve_ftmad_h,
3666 gen_helper_sve_ftmad_s,
3667 gen_helper_sve_ftmad_d,
3668 };
3669
3670 if (a->esz == 0) {
3671 return false;
3672 }
3673 if (sve_access_check(s)) {
3674 unsigned vsz = vec_full_reg_size(s);
3675 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3676 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3677 vec_full_reg_offset(s, a->rn),
3678 vec_full_reg_offset(s, a->rm),
3679 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3680 tcg_temp_free_ptr(status);
3681 }
3682 return true;
3683}
3684
7f9ddf64
RH
3685/*
3686 *** SVE Floating Point Accumulating Reduction Group
3687 */
3688
3689static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3690{
3691 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3692 TCGv_ptr, TCGv_ptr, TCGv_i32);
3693 static fadda_fn * const fns[3] = {
3694 gen_helper_sve_fadda_h,
3695 gen_helper_sve_fadda_s,
3696 gen_helper_sve_fadda_d,
3697 };
3698 unsigned vsz = vec_full_reg_size(s);
3699 TCGv_ptr t_rm, t_pg, t_fpst;
3700 TCGv_i64 t_val;
3701 TCGv_i32 t_desc;
3702
3703 if (a->esz == 0) {
3704 return false;
3705 }
3706 if (!sve_access_check(s)) {
3707 return true;
3708 }
3709
3710 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3711 t_rm = tcg_temp_new_ptr();
3712 t_pg = tcg_temp_new_ptr();
3713 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3714 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3715 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3716 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3717
3718 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3719
3720 tcg_temp_free_i32(t_desc);
3721 tcg_temp_free_ptr(t_fpst);
3722 tcg_temp_free_ptr(t_pg);
3723 tcg_temp_free_ptr(t_rm);
3724
3725 write_fp_dreg(s, a->rd, t_val);
3726 tcg_temp_free_i64(t_val);
3727 return true;
3728}
3729
29b80469
RH
3730/*
3731 *** SVE Floating Point Arithmetic - Unpredicated Group
3732 */
3733
3734static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3735 gen_helper_gvec_3_ptr *fn)
3736{
3737 if (fn == NULL) {
3738 return false;
3739 }
3740 if (sve_access_check(s)) {
3741 unsigned vsz = vec_full_reg_size(s);
3742 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3743 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3744 vec_full_reg_offset(s, a->rn),
3745 vec_full_reg_offset(s, a->rm),
3746 status, vsz, vsz, 0, fn);
3747 tcg_temp_free_ptr(status);
3748 }
3749 return true;
3750}
3751
3752
3753#define DO_FP3(NAME, name) \
3754static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3755{ \
3756 static gen_helper_gvec_3_ptr * const fns[4] = { \
3757 NULL, gen_helper_gvec_##name##_h, \
3758 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3759 }; \
3760 return do_zzz_fp(s, a, fns[a->esz]); \
3761}
3762
3763DO_FP3(FADD_zzz, fadd)
3764DO_FP3(FSUB_zzz, fsub)
3765DO_FP3(FMUL_zzz, fmul)
3766DO_FP3(FTSMUL, ftsmul)
3767DO_FP3(FRECPS, recps)
3768DO_FP3(FRSQRTS, rsqrts)
3769
3770#undef DO_FP3
3771
ec3b87c2
RH
3772/*
3773 *** SVE Floating Point Arithmetic - Predicated Group
3774 */
3775
3776static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3777 gen_helper_gvec_4_ptr *fn)
3778{
3779 if (fn == NULL) {
3780 return false;
3781 }
3782 if (sve_access_check(s)) {
3783 unsigned vsz = vec_full_reg_size(s);
3784 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3785 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3786 vec_full_reg_offset(s, a->rn),
3787 vec_full_reg_offset(s, a->rm),
3788 pred_full_reg_offset(s, a->pg),
3789 status, vsz, vsz, 0, fn);
3790 tcg_temp_free_ptr(status);
3791 }
3792 return true;
3793}
3794
3795#define DO_FP3(NAME, name) \
3796static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3797{ \
3798 static gen_helper_gvec_4_ptr * const fns[4] = { \
3799 NULL, gen_helper_sve_##name##_h, \
3800 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3801 }; \
3802 return do_zpzz_fp(s, a, fns[a->esz]); \
3803}
3804
3805DO_FP3(FADD_zpzz, fadd)
3806DO_FP3(FSUB_zpzz, fsub)
3807DO_FP3(FMUL_zpzz, fmul)
3808DO_FP3(FMIN_zpzz, fmin)
3809DO_FP3(FMAX_zpzz, fmax)
3810DO_FP3(FMINNM_zpzz, fminnum)
3811DO_FP3(FMAXNM_zpzz, fmaxnum)
3812DO_FP3(FABD, fabd)
3813DO_FP3(FSCALE, fscalbn)
3814DO_FP3(FDIV, fdiv)
3815DO_FP3(FMULX, fmulx)
3816
3817#undef DO_FP3
8092c6a3 3818
cc48affe
RH
3819typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3820 TCGv_i64, TCGv_ptr, TCGv_i32);
3821
3822static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3823 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3824{
3825 unsigned vsz = vec_full_reg_size(s);
3826 TCGv_ptr t_zd, t_zn, t_pg, status;
3827 TCGv_i32 desc;
3828
3829 t_zd = tcg_temp_new_ptr();
3830 t_zn = tcg_temp_new_ptr();
3831 t_pg = tcg_temp_new_ptr();
3832 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3833 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3834 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3835
3836 status = get_fpstatus_ptr(is_fp16);
3837 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3838 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3839
3840 tcg_temp_free_i32(desc);
3841 tcg_temp_free_ptr(status);
3842 tcg_temp_free_ptr(t_pg);
3843 tcg_temp_free_ptr(t_zn);
3844 tcg_temp_free_ptr(t_zd);
3845}
3846
3847static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3848 gen_helper_sve_fp2scalar *fn)
3849{
3850 TCGv_i64 temp = tcg_const_i64(imm);
3851 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3852 tcg_temp_free_i64(temp);
3853}
3854
3855#define DO_FP_IMM(NAME, name, const0, const1) \
3856static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a, \
3857 uint32_t insn) \
3858{ \
3859 static gen_helper_sve_fp2scalar * const fns[3] = { \
3860 gen_helper_sve_##name##_h, \
3861 gen_helper_sve_##name##_s, \
3862 gen_helper_sve_##name##_d \
3863 }; \
3864 static uint64_t const val[3][2] = { \
3865 { float16_##const0, float16_##const1 }, \
3866 { float32_##const0, float32_##const1 }, \
3867 { float64_##const0, float64_##const1 }, \
3868 }; \
3869 if (a->esz == 0) { \
3870 return false; \
3871 } \
3872 if (sve_access_check(s)) { \
3873 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3874 } \
3875 return true; \
3876}
3877
3878#define float16_two make_float16(0x4000)
3879#define float32_two make_float32(0x40000000)
3880#define float64_two make_float64(0x4000000000000000ULL)
3881
3882DO_FP_IMM(FADD, fadds, half, one)
3883DO_FP_IMM(FSUB, fsubs, half, one)
3884DO_FP_IMM(FMUL, fmuls, half, two)
3885DO_FP_IMM(FSUBR, fsubrs, half, one)
3886DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3887DO_FP_IMM(FMINNM, fminnms, zero, one)
3888DO_FP_IMM(FMAX, fmaxs, zero, one)
3889DO_FP_IMM(FMIN, fmins, zero, one)
3890
3891#undef DO_FP_IMM
3892
abfdefd5
RH
3893static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3894 gen_helper_gvec_4_ptr *fn)
3895{
3896 if (fn == NULL) {
3897 return false;
3898 }
3899 if (sve_access_check(s)) {
3900 unsigned vsz = vec_full_reg_size(s);
3901 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3902 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3903 vec_full_reg_offset(s, a->rn),
3904 vec_full_reg_offset(s, a->rm),
3905 pred_full_reg_offset(s, a->pg),
3906 status, vsz, vsz, 0, fn);
3907 tcg_temp_free_ptr(status);
3908 }
3909 return true;
3910}
3911
3912#define DO_FPCMP(NAME, name) \
3913static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
3914 uint32_t insn) \
3915{ \
3916 static gen_helper_gvec_4_ptr * const fns[4] = { \
3917 NULL, gen_helper_sve_##name##_h, \
3918 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3919 }; \
3920 return do_fp_cmp(s, a, fns[a->esz]); \
3921}
3922
3923DO_FPCMP(FCMGE, fcmge)
3924DO_FPCMP(FCMGT, fcmgt)
3925DO_FPCMP(FCMEQ, fcmeq)
3926DO_FPCMP(FCMNE, fcmne)
3927DO_FPCMP(FCMUO, fcmuo)
3928DO_FPCMP(FACGE, facge)
3929DO_FPCMP(FACGT, facgt)
3930
3931#undef DO_FPCMP
3932
76a9d9cd
RH
3933static bool trans_FCADD(DisasContext *s, arg_FCADD *a, uint32_t insn)
3934{
3935 static gen_helper_gvec_4_ptr * const fns[3] = {
3936 gen_helper_sve_fcadd_h,
3937 gen_helper_sve_fcadd_s,
3938 gen_helper_sve_fcadd_d
3939 };
3940
3941 if (a->esz == 0) {
3942 return false;
3943 }
3944 if (sve_access_check(s)) {
3945 unsigned vsz = vec_full_reg_size(s);
3946 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3947 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3948 vec_full_reg_offset(s, a->rn),
3949 vec_full_reg_offset(s, a->rm),
3950 pred_full_reg_offset(s, a->pg),
3951 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3952 tcg_temp_free_ptr(status);
3953 }
3954 return true;
3955}
3956
6ceabaad
RH
3957typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3958
3959static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3960{
3961 if (fn == NULL) {
3962 return false;
3963 }
3964 if (!sve_access_check(s)) {
3965 return true;
3966 }
3967
3968 unsigned vsz = vec_full_reg_size(s);
3969 unsigned desc;
3970 TCGv_i32 t_desc;
3971 TCGv_ptr pg = tcg_temp_new_ptr();
3972
3973 /* We would need 7 operands to pass these arguments "properly".
3974 * So we encode all the register numbers into the descriptor.
3975 */
3976 desc = deposit32(a->rd, 5, 5, a->rn);
3977 desc = deposit32(desc, 10, 5, a->rm);
3978 desc = deposit32(desc, 15, 5, a->ra);
3979 desc = simd_desc(vsz, vsz, desc);
3980
3981 t_desc = tcg_const_i32(desc);
3982 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3983 fn(cpu_env, pg, t_desc);
3984 tcg_temp_free_i32(t_desc);
3985 tcg_temp_free_ptr(pg);
3986 return true;
3987}
3988
3989#define DO_FMLA(NAME, name) \
3990static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3991{ \
3992 static gen_helper_sve_fmla * const fns[4] = { \
3993 NULL, gen_helper_sve_##name##_h, \
3994 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3995 }; \
3996 return do_fmla(s, a, fns[a->esz]); \
3997}
3998
3999DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4000DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4001DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4002DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4003
4004#undef DO_FMLA
4005
05f48bab
RH
4006static bool trans_FCMLA_zpzzz(DisasContext *s,
4007 arg_FCMLA_zpzzz *a, uint32_t insn)
4008{
4009 static gen_helper_sve_fmla * const fns[3] = {
4010 gen_helper_sve_fcmla_zpzzz_h,
4011 gen_helper_sve_fcmla_zpzzz_s,
4012 gen_helper_sve_fcmla_zpzzz_d,
4013 };
4014
4015 if (a->esz == 0) {
4016 return false;
4017 }
4018 if (sve_access_check(s)) {
4019 unsigned vsz = vec_full_reg_size(s);
4020 unsigned desc;
4021 TCGv_i32 t_desc;
4022 TCGv_ptr pg = tcg_temp_new_ptr();
4023
4024 /* We would need 7 operands to pass these arguments "properly".
4025 * So we encode all the register numbers into the descriptor.
4026 */
4027 desc = deposit32(a->rd, 5, 5, a->rn);
4028 desc = deposit32(desc, 10, 5, a->rm);
4029 desc = deposit32(desc, 15, 5, a->ra);
4030 desc = deposit32(desc, 20, 2, a->rot);
4031 desc = sextract32(desc, 0, 22);
4032 desc = simd_desc(vsz, vsz, desc);
4033
4034 t_desc = tcg_const_i32(desc);
4035 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4036 fns[a->esz - 1](cpu_env, pg, t_desc);
4037 tcg_temp_free_i32(t_desc);
4038 tcg_temp_free_ptr(pg);
4039 }
4040 return true;
4041}
4042
18fc2405
RH
4043static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a, uint32_t insn)
4044{
4045 static gen_helper_gvec_3_ptr * const fns[2] = {
4046 gen_helper_gvec_fcmlah_idx,
4047 gen_helper_gvec_fcmlas_idx,
4048 };
4049
4050 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4051 tcg_debug_assert(a->rd == a->ra);
4052 if (sve_access_check(s)) {
4053 unsigned vsz = vec_full_reg_size(s);
4054 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4055 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4056 vec_full_reg_offset(s, a->rn),
4057 vec_full_reg_offset(s, a->rm),
4058 status, vsz, vsz,
4059 a->index * 4 + a->rot,
4060 fns[a->esz - 1]);
4061 tcg_temp_free_ptr(status);
4062 }
4063 return true;
4064}
4065
8092c6a3
RH
4066/*
4067 *** SVE Floating Point Unary Operations Predicated Group
4068 */
4069
4070static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4071 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4072{
4073 if (sve_access_check(s)) {
4074 unsigned vsz = vec_full_reg_size(s);
4075 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4076 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4077 vec_full_reg_offset(s, rn),
4078 pred_full_reg_offset(s, pg),
4079 status, vsz, vsz, 0, fn);
4080 tcg_temp_free_ptr(status);
4081 }
4082 return true;
4083}
4084
46d33d1e
RH
4085static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4086{
4087 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_sh);
4088}
4089
4090static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4091{
4092 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4093}
4094
4095static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4096{
4097 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_dh);
4098}
4099
4100static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4101{
4102 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4103}
4104
4105static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4106{
4107 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4108}
4109
4110static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4111{
4112 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4113}
4114
df4de1af
RH
4115static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4116{
4117 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4118}
4119
4120static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4121{
4122 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4123}
4124
4125static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4126{
4127 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4128}
4129
4130static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4131{
4132 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4133}
4134
4135static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4136{
4137 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4138}
4139
4140static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4141{
4142 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4143}
4144
4145static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4146{
4147 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4148}
4149
4150static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4151{
4152 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4153}
4154
4155static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4156{
4157 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4158}
4159
4160static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4161{
4162 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4163}
4164
4165static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4166{
4167 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4168}
4169
4170static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4171{
4172 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4173}
4174
4175static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4176{
4177 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4178}
4179
4180static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4181{
4182 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4183}
4184
cda3c753
RH
4185static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4186 gen_helper_sve_frint_h,
4187 gen_helper_sve_frint_s,
4188 gen_helper_sve_frint_d
4189};
4190
4191static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4192{
4193 if (a->esz == 0) {
4194 return false;
4195 }
4196 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4197 frint_fns[a->esz - 1]);
4198}
4199
4200static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4201{
4202 static gen_helper_gvec_3_ptr * const fns[3] = {
4203 gen_helper_sve_frintx_h,
4204 gen_helper_sve_frintx_s,
4205 gen_helper_sve_frintx_d
4206 };
4207 if (a->esz == 0) {
4208 return false;
4209 }
4210 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4211}
4212
4213static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4214{
4215 if (a->esz == 0) {
4216 return false;
4217 }
4218 if (sve_access_check(s)) {
4219 unsigned vsz = vec_full_reg_size(s);
4220 TCGv_i32 tmode = tcg_const_i32(mode);
4221 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4222
4223 gen_helper_set_rmode(tmode, tmode, status);
4224
4225 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4226 vec_full_reg_offset(s, a->rn),
4227 pred_full_reg_offset(s, a->pg),
4228 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4229
4230 gen_helper_set_rmode(tmode, tmode, status);
4231 tcg_temp_free_i32(tmode);
4232 tcg_temp_free_ptr(status);
4233 }
4234 return true;
4235}
4236
4237static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4238{
4239 return do_frint_mode(s, a, float_round_nearest_even);
4240}
4241
4242static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4243{
4244 return do_frint_mode(s, a, float_round_up);
4245}
4246
4247static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4248{
4249 return do_frint_mode(s, a, float_round_down);
4250}
4251
4252static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4253{
4254 return do_frint_mode(s, a, float_round_to_zero);
4255}
4256
4257static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4258{
4259 return do_frint_mode(s, a, float_round_ties_away);
4260}
4261
ec5b375b
RH
4262static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4263{
4264 static gen_helper_gvec_3_ptr * const fns[3] = {
4265 gen_helper_sve_frecpx_h,
4266 gen_helper_sve_frecpx_s,
4267 gen_helper_sve_frecpx_d
4268 };
4269 if (a->esz == 0) {
4270 return false;
4271 }
4272 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4273}
4274
4275static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4276{
4277 static gen_helper_gvec_3_ptr * const fns[3] = {
4278 gen_helper_sve_fsqrt_h,
4279 gen_helper_sve_fsqrt_s,
4280 gen_helper_sve_fsqrt_d
4281 };
4282 if (a->esz == 0) {
4283 return false;
4284 }
4285 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4286}
4287
8092c6a3
RH
4288static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4289{
4290 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4291}
4292
4293static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4294{
4295 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4296}
4297
4298static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4299{
4300 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4301}
4302
4303static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4304{
4305 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4306}
4307
4308static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4309{
4310 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4311}
4312
4313static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4314{
4315 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4316}
4317
4318static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4319{
4320 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4321}
4322
4323static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4324{
4325 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4326}
4327
4328static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4329{
4330 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4331}
4332
4333static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4334{
4335 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4336}
4337
4338static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4339{
4340 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4341}
4342
4343static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4344{
4345 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4346}
4347
4348static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4349{
4350 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4351}
4352
4353static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4354{
4355 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4356}
4357
d1822297
RH
4358/*
4359 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4360 */
4361
4362/* Subroutine loading a vector register at VOFS of LEN bytes.
4363 * The load should begin at the address Rn + IMM.
4364 */
4365
4366static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
4367 int rn, int imm)
4368{
4369 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4370 uint32_t len_remain = len % 8;
4371 uint32_t nparts = len / 8 + ctpop8(len_remain);
4372 int midx = get_mem_index(s);
4373 TCGv_i64 addr, t0, t1;
4374
4375 addr = tcg_temp_new_i64();
4376 t0 = tcg_temp_new_i64();
4377
4378 /* Note that unpredicated load/store of vector/predicate registers
4379 * are defined as a stream of bytes, which equates to little-endian
4380 * operations on larger quantities. There is no nice way to force
4381 * a little-endian load for aarch64_be-linux-user out of line.
4382 *
4383 * Attempt to keep code expansion to a minimum by limiting the
4384 * amount of unrolling done.
4385 */
4386 if (nparts <= 4) {
4387 int i;
4388
4389 for (i = 0; i < len_align; i += 8) {
4390 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4391 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4392 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4393 }
4394 } else {
4395 TCGLabel *loop = gen_new_label();
4396 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4397
4398 gen_set_label(loop);
4399
4400 /* Minimize the number of local temps that must be re-read from
4401 * the stack each iteration. Instead, re-compute values other
4402 * than the loop counter.
4403 */
4404 tp = tcg_temp_new_ptr();
4405 tcg_gen_addi_ptr(tp, i, imm);
4406 tcg_gen_extu_ptr_i64(addr, tp);
4407 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4408
4409 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4410
4411 tcg_gen_add_ptr(tp, cpu_env, i);
4412 tcg_gen_addi_ptr(i, i, 8);
4413 tcg_gen_st_i64(t0, tp, vofs);
4414 tcg_temp_free_ptr(tp);
4415
4416 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4417 tcg_temp_free_ptr(i);
4418 }
4419
4420 /* Predicate register loads can be any multiple of 2.
4421 * Note that we still store the entire 64-bit unit into cpu_env.
4422 */
4423 if (len_remain) {
4424 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4425
4426 switch (len_remain) {
4427 case 2:
4428 case 4:
4429 case 8:
4430 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4431 break;
4432
4433 case 6:
4434 t1 = tcg_temp_new_i64();
4435 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4436 tcg_gen_addi_i64(addr, addr, 4);
4437 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4438 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4439 tcg_temp_free_i64(t1);
4440 break;
4441
4442 default:
4443 g_assert_not_reached();
4444 }
4445 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4446 }
4447 tcg_temp_free_i64(addr);
4448 tcg_temp_free_i64(t0);
4449}
4450
5047c204
RH
4451/* Similarly for stores. */
4452static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
4453 int rn, int imm)
4454{
4455 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4456 uint32_t len_remain = len % 8;
4457 uint32_t nparts = len / 8 + ctpop8(len_remain);
4458 int midx = get_mem_index(s);
4459 TCGv_i64 addr, t0;
4460
4461 addr = tcg_temp_new_i64();
4462 t0 = tcg_temp_new_i64();
4463
4464 /* Note that unpredicated load/store of vector/predicate registers
4465 * are defined as a stream of bytes, which equates to little-endian
4466 * operations on larger quantities. There is no nice way to force
4467 * a little-endian store for aarch64_be-linux-user out of line.
4468 *
4469 * Attempt to keep code expansion to a minimum by limiting the
4470 * amount of unrolling done.
4471 */
4472 if (nparts <= 4) {
4473 int i;
4474
4475 for (i = 0; i < len_align; i += 8) {
4476 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4477 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4478 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4479 }
4480 } else {
4481 TCGLabel *loop = gen_new_label();
4482 TCGv_ptr t2, i = tcg_const_local_ptr(0);
4483
4484 gen_set_label(loop);
4485
4486 t2 = tcg_temp_new_ptr();
4487 tcg_gen_add_ptr(t2, cpu_env, i);
4488 tcg_gen_ld_i64(t0, t2, vofs);
4489
4490 /* Minimize the number of local temps that must be re-read from
4491 * the stack each iteration. Instead, re-compute values other
4492 * than the loop counter.
4493 */
4494 tcg_gen_addi_ptr(t2, i, imm);
4495 tcg_gen_extu_ptr_i64(addr, t2);
4496 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4497 tcg_temp_free_ptr(t2);
4498
4499 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4500
4501 tcg_gen_addi_ptr(i, i, 8);
4502
4503 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4504 tcg_temp_free_ptr(i);
4505 }
4506
4507 /* Predicate register stores can be any multiple of 2. */
4508 if (len_remain) {
4509 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4510 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4511
4512 switch (len_remain) {
4513 case 2:
4514 case 4:
4515 case 8:
4516 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4517 break;
4518
4519 case 6:
4520 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4521 tcg_gen_addi_i64(addr, addr, 4);
4522 tcg_gen_shri_i64(t0, t0, 32);
4523 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4524 break;
4525
4526 default:
4527 g_assert_not_reached();
4528 }
4529 }
4530 tcg_temp_free_i64(addr);
4531 tcg_temp_free_i64(t0);
4532}
4533
d1822297
RH
4534static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4535{
4536 if (sve_access_check(s)) {
4537 int size = vec_full_reg_size(s);
4538 int off = vec_full_reg_offset(s, a->rd);
4539 do_ldr(s, off, size, a->rn, a->imm * size);
4540 }
4541 return true;
4542}
4543
4544static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4545{
4546 if (sve_access_check(s)) {
4547 int size = pred_full_reg_size(s);
4548 int off = pred_full_reg_offset(s, a->rd);
4549 do_ldr(s, off, size, a->rn, a->imm * size);
4550 }
4551 return true;
4552}
c4e7c493 4553
5047c204
RH
4554static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4555{
4556 if (sve_access_check(s)) {
4557 int size = vec_full_reg_size(s);
4558 int off = vec_full_reg_offset(s, a->rd);
4559 do_str(s, off, size, a->rn, a->imm * size);
4560 }
4561 return true;
4562}
4563
4564static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4565{
4566 if (sve_access_check(s)) {
4567 int size = pred_full_reg_size(s);
4568 int off = pred_full_reg_offset(s, a->rd);
4569 do_str(s, off, size, a->rn, a->imm * size);
4570 }
4571 return true;
4572}
4573
c4e7c493
RH
4574/*
4575 *** SVE Memory - Contiguous Load Group
4576 */
4577
4578/* The memory mode of the dtype. */
4579static const TCGMemOp dtype_mop[16] = {
4580 MO_UB, MO_UB, MO_UB, MO_UB,
4581 MO_SL, MO_UW, MO_UW, MO_UW,
4582 MO_SW, MO_SW, MO_UL, MO_UL,
4583 MO_SB, MO_SB, MO_SB, MO_Q
4584};
4585
4586#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4587
4588/* The vector element size of dtype. */
4589static const uint8_t dtype_esz[16] = {
4590 0, 1, 2, 3,
4591 3, 1, 2, 3,
4592 3, 2, 2, 3,
4593 3, 2, 1, 3
4594};
4595
4596static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4597 gen_helper_gvec_mem *fn)
4598{
4599 unsigned vsz = vec_full_reg_size(s);
4600 TCGv_ptr t_pg;
4601 TCGv_i32 desc;
4602
4603 /* For e.g. LD4, there are not enough arguments to pass all 4
4604 * registers as pointers, so encode the regno into the data field.
4605 * For consistency, do this even for LD1.
4606 */
4607 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4608 t_pg = tcg_temp_new_ptr();
4609
4610 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4611 fn(cpu_env, t_pg, addr, desc);
4612
4613 tcg_temp_free_ptr(t_pg);
4614 tcg_temp_free_i32(desc);
4615}
4616
4617static void do_ld_zpa(DisasContext *s, int zt, int pg,
4618 TCGv_i64 addr, int dtype, int nreg)
4619{
4620 static gen_helper_gvec_mem * const fns[16][4] = {
4621 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4622 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4623 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4624 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4625 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4626
4627 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4628 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4629 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4630 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4631 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4632
4633 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4634 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4635 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4636 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4637 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4638
4639 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4640 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4641 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4642 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4643 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4644 };
4645 gen_helper_gvec_mem *fn = fns[dtype][nreg];
4646
4647 /* While there are holes in the table, they are not
4648 * accessible via the instruction encoding.
4649 */
4650 assert(fn != NULL);
4651 do_mem_zpa(s, zt, pg, addr, fn);
4652}
4653
4654static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4655{
4656 if (a->rm == 31) {
4657 return false;
4658 }
4659 if (sve_access_check(s)) {
4660 TCGv_i64 addr = new_tmp_a64(s);
4661 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
4662 (a->nreg + 1) << dtype_msz(a->dtype));
4663 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4664 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4665 }
4666 return true;
4667}
4668
4669static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4670{
4671 if (sve_access_check(s)) {
4672 int vsz = vec_full_reg_size(s);
4673 int elements = vsz >> dtype_esz[a->dtype];
4674 TCGv_i64 addr = new_tmp_a64(s);
4675
4676 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4677 (a->imm * elements * (a->nreg + 1))
4678 << dtype_msz(a->dtype));
4679 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4680 }
4681 return true;
4682}
e2654d75
RH
4683
4684static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4685{
4686 static gen_helper_gvec_mem * const fns[16] = {
4687 gen_helper_sve_ldff1bb_r,
4688 gen_helper_sve_ldff1bhu_r,
4689 gen_helper_sve_ldff1bsu_r,
4690 gen_helper_sve_ldff1bdu_r,
4691
4692 gen_helper_sve_ldff1sds_r,
4693 gen_helper_sve_ldff1hh_r,
4694 gen_helper_sve_ldff1hsu_r,
4695 gen_helper_sve_ldff1hdu_r,
4696
4697 gen_helper_sve_ldff1hds_r,
4698 gen_helper_sve_ldff1hss_r,
4699 gen_helper_sve_ldff1ss_r,
4700 gen_helper_sve_ldff1sdu_r,
4701
4702 gen_helper_sve_ldff1bds_r,
4703 gen_helper_sve_ldff1bss_r,
4704 gen_helper_sve_ldff1bhs_r,
4705 gen_helper_sve_ldff1dd_r,
4706 };
4707
4708 if (sve_access_check(s)) {
4709 TCGv_i64 addr = new_tmp_a64(s);
4710 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4711 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4712 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4713 }
4714 return true;
4715}
4716
4717static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4718{
4719 static gen_helper_gvec_mem * const fns[16] = {
4720 gen_helper_sve_ldnf1bb_r,
4721 gen_helper_sve_ldnf1bhu_r,
4722 gen_helper_sve_ldnf1bsu_r,
4723 gen_helper_sve_ldnf1bdu_r,
4724
4725 gen_helper_sve_ldnf1sds_r,
4726 gen_helper_sve_ldnf1hh_r,
4727 gen_helper_sve_ldnf1hsu_r,
4728 gen_helper_sve_ldnf1hdu_r,
4729
4730 gen_helper_sve_ldnf1hds_r,
4731 gen_helper_sve_ldnf1hss_r,
4732 gen_helper_sve_ldnf1ss_r,
4733 gen_helper_sve_ldnf1sdu_r,
4734
4735 gen_helper_sve_ldnf1bds_r,
4736 gen_helper_sve_ldnf1bss_r,
4737 gen_helper_sve_ldnf1bhs_r,
4738 gen_helper_sve_ldnf1dd_r,
4739 };
4740
4741 if (sve_access_check(s)) {
4742 int vsz = vec_full_reg_size(s);
4743 int elements = vsz >> dtype_esz[a->dtype];
4744 int off = (a->imm * elements) << dtype_msz(a->dtype);
4745 TCGv_i64 addr = new_tmp_a64(s);
4746
4747 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4748 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4749 }
4750 return true;
4751}
1a039c7e 4752
05abe304
RH
4753static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4754{
4755 static gen_helper_gvec_mem * const fns[4] = {
4756 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4757 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4758 };
4759 unsigned vsz = vec_full_reg_size(s);
4760 TCGv_ptr t_pg;
4761 TCGv_i32 desc;
4762
4763 /* Load the first quadword using the normal predicated load helpers. */
4764 desc = tcg_const_i32(simd_desc(16, 16, zt));
4765 t_pg = tcg_temp_new_ptr();
4766
4767 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4768 fns[msz](cpu_env, t_pg, addr, desc);
4769
4770 tcg_temp_free_ptr(t_pg);
4771 tcg_temp_free_i32(desc);
4772
4773 /* Replicate that first quadword. */
4774 if (vsz > 16) {
4775 unsigned dofs = vec_full_reg_offset(s, zt);
4776 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4777 }
4778}
4779
4780static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4781{
4782 if (a->rm == 31) {
4783 return false;
4784 }
4785 if (sve_access_check(s)) {
4786 int msz = dtype_msz(a->dtype);
4787 TCGv_i64 addr = new_tmp_a64(s);
4788 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4789 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4790 do_ldrq(s, a->rd, a->pg, addr, msz);
4791 }
4792 return true;
4793}
4794
4795static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4796{
4797 if (sve_access_check(s)) {
4798 TCGv_i64 addr = new_tmp_a64(s);
4799 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4800 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4801 }
4802 return true;
4803}
4804
68459864
RH
4805/* Load and broadcast element. */
4806static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4807{
4808 if (!sve_access_check(s)) {
4809 return true;
4810 }
4811
4812 unsigned vsz = vec_full_reg_size(s);
4813 unsigned psz = pred_full_reg_size(s);
4814 unsigned esz = dtype_esz[a->dtype];
4815 TCGLabel *over = gen_new_label();
4816 TCGv_i64 temp;
4817
4818 /* If the guarding predicate has no bits set, no load occurs. */
4819 if (psz <= 8) {
4820 /* Reduce the pred_esz_masks value simply to reduce the
4821 * size of the code generated here.
4822 */
4823 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4824 temp = tcg_temp_new_i64();
4825 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4826 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4827 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4828 tcg_temp_free_i64(temp);
4829 } else {
4830 TCGv_i32 t32 = tcg_temp_new_i32();
4831 find_last_active(s, t32, esz, a->pg);
4832 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4833 tcg_temp_free_i32(t32);
4834 }
4835
4836 /* Load the data. */
4837 temp = tcg_temp_new_i64();
4838 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4839 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4840 s->be_data | dtype_mop[a->dtype]);
4841
4842 /* Broadcast to *all* elements. */
4843 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4844 vsz, vsz, temp);
4845 tcg_temp_free_i64(temp);
4846
4847 /* Zero the inactive elements. */
4848 gen_set_label(over);
4849 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4850 return true;
4851}
4852
1a039c7e
RH
4853static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4854 int msz, int esz, int nreg)
4855{
4856 static gen_helper_gvec_mem * const fn_single[4][4] = {
4857 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4858 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4859 { NULL, gen_helper_sve_st1hh_r,
4860 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4861 { NULL, NULL,
4862 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4863 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4864 };
4865 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4866 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4867 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4868 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4869 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4870 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4871 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4872 };
4873 gen_helper_gvec_mem *fn;
4874
4875 if (nreg == 0) {
4876 /* ST1 */
4877 fn = fn_single[msz][esz];
4878 } else {
4879 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4880 assert(msz == esz);
4881 fn = fn_multiple[nreg - 1][msz];
4882 }
4883 assert(fn != NULL);
4884 do_mem_zpa(s, zt, pg, addr, fn);
4885}
4886
4887static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4888{
4889 if (a->rm == 31 || a->msz > a->esz) {
4890 return false;
4891 }
4892 if (sve_access_check(s)) {
4893 TCGv_i64 addr = new_tmp_a64(s);
4894 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4895 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4896 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4897 }
4898 return true;
4899}
4900
4901static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4902{
4903 if (a->msz > a->esz) {
4904 return false;
4905 }
4906 if (sve_access_check(s)) {
4907 int vsz = vec_full_reg_size(s);
4908 int elements = vsz >> a->esz;
4909 TCGv_i64 addr = new_tmp_a64(s);
4910
4911 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4912 (a->imm * elements * (a->nreg + 1)) << a->msz);
4913 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4914 }
4915 return true;
4916}
f6dbf62a
RH
4917
4918/*
4919 *** SVE gather loads / scatter stores
4920 */
4921
4922static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4923 TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4924{
4925 unsigned vsz = vec_full_reg_size(s);
4926 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4927 TCGv_ptr t_zm = tcg_temp_new_ptr();
4928 TCGv_ptr t_pg = tcg_temp_new_ptr();
4929 TCGv_ptr t_zt = tcg_temp_new_ptr();
4930
4931 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4932 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4933 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4934 fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4935
4936 tcg_temp_free_ptr(t_zt);
4937 tcg_temp_free_ptr(t_zm);
4938 tcg_temp_free_ptr(t_pg);
4939 tcg_temp_free_i32(desc);
4940}
4941
673e9fa6
RH
4942/* Indexed by [ff][xs][u][msz]. */
4943static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4944 { { { gen_helper_sve_ldbss_zsu,
4945 gen_helper_sve_ldhss_zsu,
4946 NULL, },
4947 { gen_helper_sve_ldbsu_zsu,
4948 gen_helper_sve_ldhsu_zsu,
4949 gen_helper_sve_ldssu_zsu, } },
4950 { { gen_helper_sve_ldbss_zss,
4951 gen_helper_sve_ldhss_zss,
4952 NULL, },
4953 { gen_helper_sve_ldbsu_zss,
4954 gen_helper_sve_ldhsu_zss,
4955 gen_helper_sve_ldssu_zss, } } },
ed67eb7f
RH
4956
4957 { { { gen_helper_sve_ldffbss_zsu,
4958 gen_helper_sve_ldffhss_zsu,
4959 NULL, },
4960 { gen_helper_sve_ldffbsu_zsu,
4961 gen_helper_sve_ldffhsu_zsu,
4962 gen_helper_sve_ldffssu_zsu, } },
4963 { { gen_helper_sve_ldffbss_zss,
4964 gen_helper_sve_ldffhss_zss,
4965 NULL, },
4966 { gen_helper_sve_ldffbsu_zss,
4967 gen_helper_sve_ldffhsu_zss,
4968 gen_helper_sve_ldffssu_zss, } } }
673e9fa6
RH
4969};
4970
4971/* Note that we overload xs=2 to indicate 64-bit offset. */
4972static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4973 { { { gen_helper_sve_ldbds_zsu,
4974 gen_helper_sve_ldhds_zsu,
4975 gen_helper_sve_ldsds_zsu,
4976 NULL, },
4977 { gen_helper_sve_ldbdu_zsu,
4978 gen_helper_sve_ldhdu_zsu,
4979 gen_helper_sve_ldsdu_zsu,
4980 gen_helper_sve_ldddu_zsu, } },
4981 { { gen_helper_sve_ldbds_zss,
4982 gen_helper_sve_ldhds_zss,
4983 gen_helper_sve_ldsds_zss,
4984 NULL, },
4985 { gen_helper_sve_ldbdu_zss,
4986 gen_helper_sve_ldhdu_zss,
4987 gen_helper_sve_ldsdu_zss,
4988 gen_helper_sve_ldddu_zss, } },
4989 { { gen_helper_sve_ldbds_zd,
4990 gen_helper_sve_ldhds_zd,
4991 gen_helper_sve_ldsds_zd,
4992 NULL, },
4993 { gen_helper_sve_ldbdu_zd,
4994 gen_helper_sve_ldhdu_zd,
4995 gen_helper_sve_ldsdu_zd,
4996 gen_helper_sve_ldddu_zd, } } },
ed67eb7f
RH
4997
4998 { { { gen_helper_sve_ldffbds_zsu,
4999 gen_helper_sve_ldffhds_zsu,
5000 gen_helper_sve_ldffsds_zsu,
5001 NULL, },
5002 { gen_helper_sve_ldffbdu_zsu,
5003 gen_helper_sve_ldffhdu_zsu,
5004 gen_helper_sve_ldffsdu_zsu,
5005 gen_helper_sve_ldffddu_zsu, } },
5006 { { gen_helper_sve_ldffbds_zss,
5007 gen_helper_sve_ldffhds_zss,
5008 gen_helper_sve_ldffsds_zss,
5009 NULL, },
5010 { gen_helper_sve_ldffbdu_zss,
5011 gen_helper_sve_ldffhdu_zss,
5012 gen_helper_sve_ldffsdu_zss,
5013 gen_helper_sve_ldffddu_zss, } },
5014 { { gen_helper_sve_ldffbds_zd,
5015 gen_helper_sve_ldffhds_zd,
5016 gen_helper_sve_ldffsds_zd,
5017 NULL, },
5018 { gen_helper_sve_ldffbdu_zd,
5019 gen_helper_sve_ldffhdu_zd,
5020 gen_helper_sve_ldffsdu_zd,
5021 gen_helper_sve_ldffddu_zd, } } }
673e9fa6
RH
5022};
5023
5024static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
5025{
5026 gen_helper_gvec_mem_scatter *fn = NULL;
5027
5028 if (!sve_access_check(s)) {
5029 return true;
5030 }
5031
5032 switch (a->esz) {
5033 case MO_32:
5034 fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
5035 break;
5036 case MO_64:
5037 fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
5038 break;
5039 }
5040 assert(fn != NULL);
5041
5042 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5043 cpu_reg_sp(s, a->rn), fn);
5044 return true;
5045}
5046
5047static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
5048{
5049 gen_helper_gvec_mem_scatter *fn = NULL;
5050 TCGv_i64 imm;
5051
5052 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5053 return false;
5054 }
5055 if (!sve_access_check(s)) {
5056 return true;
5057 }
5058
5059 switch (a->esz) {
5060 case MO_32:
5061 fn = gather_load_fn32[a->ff][0][a->u][a->msz];
5062 break;
5063 case MO_64:
5064 fn = gather_load_fn64[a->ff][2][a->u][a->msz];
5065 break;
5066 }
5067 assert(fn != NULL);
5068
5069 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5070 * by loading the immediate into the scalar parameter.
5071 */
5072 imm = tcg_const_i64(a->imm << a->msz);
5073 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5074 tcg_temp_free_i64(imm);
5075 return true;
5076}
5077
408ecde9
RH
5078/* Indexed by [xs][msz]. */
5079static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
5080 { gen_helper_sve_stbs_zsu,
5081 gen_helper_sve_sths_zsu,
5082 gen_helper_sve_stss_zsu, },
5083 { gen_helper_sve_stbs_zss,
5084 gen_helper_sve_sths_zss,
5085 gen_helper_sve_stss_zss, },
5086};
5087
5088/* Note that we overload xs=2 to indicate 64-bit offset. */
5089static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
5090 { gen_helper_sve_stbd_zsu,
5091 gen_helper_sve_sthd_zsu,
5092 gen_helper_sve_stsd_zsu,
5093 gen_helper_sve_stdd_zsu, },
5094 { gen_helper_sve_stbd_zss,
5095 gen_helper_sve_sthd_zss,
5096 gen_helper_sve_stsd_zss,
5097 gen_helper_sve_stdd_zss, },
5098 { gen_helper_sve_stbd_zd,
5099 gen_helper_sve_sthd_zd,
5100 gen_helper_sve_stsd_zd,
5101 gen_helper_sve_stdd_zd, },
5102};
5103
f6dbf62a
RH
5104static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
5105{
f6dbf62a
RH
5106 gen_helper_gvec_mem_scatter *fn;
5107
5108 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5109 return false;
5110 }
5111 if (!sve_access_check(s)) {
5112 return true;
5113 }
5114 switch (a->esz) {
5115 case MO_32:
408ecde9 5116 fn = scatter_store_fn32[a->xs][a->msz];
f6dbf62a
RH
5117 break;
5118 case MO_64:
408ecde9 5119 fn = scatter_store_fn64[a->xs][a->msz];
f6dbf62a
RH
5120 break;
5121 default:
5122 g_assert_not_reached();
5123 }
5124 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5125 cpu_reg_sp(s, a->rn), fn);
5126 return true;
5127}
dec6cf6b 5128
408ecde9
RH
5129static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
5130{
5131 gen_helper_gvec_mem_scatter *fn = NULL;
5132 TCGv_i64 imm;
5133
5134 if (a->esz < a->msz) {
5135 return false;
5136 }
5137 if (!sve_access_check(s)) {
5138 return true;
5139 }
5140
5141 switch (a->esz) {
5142 case MO_32:
5143 fn = scatter_store_fn32[0][a->msz];
5144 break;
5145 case MO_64:
5146 fn = scatter_store_fn64[2][a->msz];
5147 break;
5148 }
5149 assert(fn != NULL);
5150
5151 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5152 * by loading the immediate into the scalar parameter.
5153 */
5154 imm = tcg_const_i64(a->imm << a->msz);
5155 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5156 tcg_temp_free_i64(imm);
5157 return true;
5158}
5159
dec6cf6b
RH
5160/*
5161 * Prefetches
5162 */
5163
5164static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
5165{
5166 /* Prefetch is a nop within QEMU. */
5167 sve_access_check(s);
5168 return true;
5169}
5170
5171static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
5172{
5173 if (a->rm == 31) {
5174 return false;
5175 }
5176 /* Prefetch is a nop within QEMU. */
5177 sve_access_check(s);
5178 return true;
5179}
a2103582
RH
5180
5181/*
5182 * Move Prefix
5183 *
5184 * TODO: The implementation so far could handle predicated merging movprfx.
5185 * The helper functions as written take an extra source register to
5186 * use in the operation, but the result is only written when predication
5187 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5188 * to allow the final write back to the destination to be unconditional.
5189 * For predicated zeroing movprfx, we need to rearrange the helpers to
5190 * allow the final write back to zero inactives.
5191 *
5192 * In the meantime, just emit the moves.
5193 */
5194
5195static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a, uint32_t insn)
5196{
5197 return do_mov_z(s, a->rd, a->rn);
5198}
5199
5200static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5201{
5202 if (sve_access_check(s)) {
5203 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5204 }
5205 return true;
5206}
5207
5208static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5209{
5210 if (sve_access_check(s)) {
5211 do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5212 }
5213 return true;
5214}