]> git.proxmox.com Git - mirror_qemu.git/blob - target/arm/translate-sve.c
d27bc8c946b04dde1dc05032d7151d3742104a72
[mirror_qemu.git] / target / arm / translate-sve.c
1 /*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
36
37
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
49
50 /*
51 * Helpers for extracting complex instruction fields.
52 */
53
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
57 static int tszimm_esz(int x)
58 {
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61 }
62
63 static int tszimm_shr(int x)
64 {
65 return (16 << tszimm_esz(x)) - x;
66 }
67
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(int x)
70 {
71 return x - (8 << tszimm_esz(x));
72 }
73
74 static inline int plus1(int x)
75 {
76 return x + 1;
77 }
78
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(int x)
81 {
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83 }
84
85 static inline int expand_imm_sh8u(int x)
86 {
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88 }
89
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
93 static inline int msz_dtype(int msz)
94 {
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97 }
98
99 /*
100 * Include the generated decoder.
101 */
102
103 #include "decode-sve.inc.c"
104
105 /*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
113 {
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115 }
116
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext *s)
119 {
120 return s->sve_len >> 3;
121 }
122
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131 static int size_for_gvec(int size)
132 {
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138 }
139
140 static int pred_gvec_reg_size(DisasContext *s)
141 {
142 return size_for_gvec(pred_full_reg_size(s));
143 }
144
145 /* Invoke a vector expander on two Zregs. */
146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
147 int esz, int rd, int rn)
148 {
149 if (sve_access_check(s)) {
150 unsigned vsz = vec_full_reg_size(s);
151 gvec_fn(esz, vec_full_reg_offset(s, rd),
152 vec_full_reg_offset(s, rn), vsz, vsz);
153 }
154 return true;
155 }
156
157 /* Invoke a vector expander on three Zregs. */
158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
159 int esz, int rd, int rn, int rm)
160 {
161 if (sve_access_check(s)) {
162 unsigned vsz = vec_full_reg_size(s);
163 gvec_fn(esz, vec_full_reg_offset(s, rd),
164 vec_full_reg_offset(s, rn),
165 vec_full_reg_offset(s, rm), vsz, vsz);
166 }
167 return true;
168 }
169
170 /* Invoke a vector move on two Zregs. */
171 static bool do_mov_z(DisasContext *s, int rd, int rn)
172 {
173 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
174 }
175
176 /* Initialize a Zreg with replications of a 64-bit immediate. */
177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
178 {
179 unsigned vsz = vec_full_reg_size(s);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
181 }
182
183 /* Invoke a vector expander on two Pregs. */
184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
185 int esz, int rd, int rn)
186 {
187 if (sve_access_check(s)) {
188 unsigned psz = pred_gvec_reg_size(s);
189 gvec_fn(esz, pred_full_reg_offset(s, rd),
190 pred_full_reg_offset(s, rn), psz, psz);
191 }
192 return true;
193 }
194
195 /* Invoke a vector expander on three Pregs. */
196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
197 int esz, int rd, int rn, int rm)
198 {
199 if (sve_access_check(s)) {
200 unsigned psz = pred_gvec_reg_size(s);
201 gvec_fn(esz, pred_full_reg_offset(s, rd),
202 pred_full_reg_offset(s, rn),
203 pred_full_reg_offset(s, rm), psz, psz);
204 }
205 return true;
206 }
207
208 /* Invoke a vector operation on four Pregs. */
209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
210 int rd, int rn, int rm, int rg)
211 {
212 if (sve_access_check(s)) {
213 unsigned psz = pred_gvec_reg_size(s);
214 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
215 pred_full_reg_offset(s, rn),
216 pred_full_reg_offset(s, rm),
217 pred_full_reg_offset(s, rg),
218 psz, psz, gvec_op);
219 }
220 return true;
221 }
222
223 /* Invoke a vector move on two Pregs. */
224 static bool do_mov_p(DisasContext *s, int rd, int rn)
225 {
226 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
227 }
228
229 /* Set the cpu flags as per a return from an SVE helper. */
230 static void do_pred_flags(TCGv_i32 t)
231 {
232 tcg_gen_mov_i32(cpu_NF, t);
233 tcg_gen_andi_i32(cpu_ZF, t, 2);
234 tcg_gen_andi_i32(cpu_CF, t, 1);
235 tcg_gen_movi_i32(cpu_VF, 0);
236 }
237
238 /* Subroutines computing the ARM PredTest psuedofunction. */
239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
240 {
241 TCGv_i32 t = tcg_temp_new_i32();
242
243 gen_helper_sve_predtest1(t, d, g);
244 do_pred_flags(t);
245 tcg_temp_free_i32(t);
246 }
247
248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
249 {
250 TCGv_ptr dptr = tcg_temp_new_ptr();
251 TCGv_ptr gptr = tcg_temp_new_ptr();
252 TCGv_i32 t;
253
254 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
255 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
256 t = tcg_const_i32(words);
257
258 gen_helper_sve_predtest(t, dptr, gptr, t);
259 tcg_temp_free_ptr(dptr);
260 tcg_temp_free_ptr(gptr);
261
262 do_pred_flags(t);
263 tcg_temp_free_i32(t);
264 }
265
266 /* For each element size, the bits within a predicate word that are active. */
267 const uint64_t pred_esz_masks[4] = {
268 0xffffffffffffffffull, 0x5555555555555555ull,
269 0x1111111111111111ull, 0x0101010101010101ull
270 };
271
272 /*
273 *** SVE Logical - Unpredicated Group
274 */
275
276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
277 {
278 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
279 }
280
281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
282 {
283 if (a->rn == a->rm) { /* MOV */
284 return do_mov_z(s, a->rd, a->rn);
285 } else {
286 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
287 }
288 }
289
290 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291 {
292 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
293 }
294
295 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
296 {
297 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
298 }
299
300 /*
301 *** SVE Integer Arithmetic - Unpredicated Group
302 */
303
304 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
305 {
306 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
307 }
308
309 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
310 {
311 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
312 }
313
314 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
315 {
316 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
317 }
318
319 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
320 {
321 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
322 }
323
324 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
325 {
326 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
327 }
328
329 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
330 {
331 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
332 }
333
334 /*
335 *** SVE Integer Arithmetic - Binary Predicated Group
336 */
337
338 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
339 {
340 unsigned vsz = vec_full_reg_size(s);
341 if (fn == NULL) {
342 return false;
343 }
344 if (sve_access_check(s)) {
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
346 vec_full_reg_offset(s, a->rn),
347 vec_full_reg_offset(s, a->rm),
348 pred_full_reg_offset(s, a->pg),
349 vsz, vsz, 0, fn);
350 }
351 return true;
352 }
353
354 /* Select active elememnts from Zn and inactive elements from Zm,
355 * storing the result in Zd.
356 */
357 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
358 {
359 static gen_helper_gvec_4 * const fns[4] = {
360 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
361 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
362 };
363 unsigned vsz = vec_full_reg_size(s);
364 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
365 vec_full_reg_offset(s, rn),
366 vec_full_reg_offset(s, rm),
367 pred_full_reg_offset(s, pg),
368 vsz, vsz, 0, fns[esz]);
369 }
370
371 #define DO_ZPZZ(NAME, name) \
372 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
373 uint32_t insn) \
374 { \
375 static gen_helper_gvec_4 * const fns[4] = { \
376 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
377 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
378 }; \
379 return do_zpzz_ool(s, a, fns[a->esz]); \
380 }
381
382 DO_ZPZZ(AND, and)
383 DO_ZPZZ(EOR, eor)
384 DO_ZPZZ(ORR, orr)
385 DO_ZPZZ(BIC, bic)
386
387 DO_ZPZZ(ADD, add)
388 DO_ZPZZ(SUB, sub)
389
390 DO_ZPZZ(SMAX, smax)
391 DO_ZPZZ(UMAX, umax)
392 DO_ZPZZ(SMIN, smin)
393 DO_ZPZZ(UMIN, umin)
394 DO_ZPZZ(SABD, sabd)
395 DO_ZPZZ(UABD, uabd)
396
397 DO_ZPZZ(MUL, mul)
398 DO_ZPZZ(SMULH, smulh)
399 DO_ZPZZ(UMULH, umulh)
400
401 DO_ZPZZ(ASR, asr)
402 DO_ZPZZ(LSR, lsr)
403 DO_ZPZZ(LSL, lsl)
404
405 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
406 {
407 static gen_helper_gvec_4 * const fns[4] = {
408 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
409 };
410 return do_zpzz_ool(s, a, fns[a->esz]);
411 }
412
413 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
414 {
415 static gen_helper_gvec_4 * const fns[4] = {
416 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
417 };
418 return do_zpzz_ool(s, a, fns[a->esz]);
419 }
420
421 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
422 {
423 if (sve_access_check(s)) {
424 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
425 }
426 return true;
427 }
428
429 #undef DO_ZPZZ
430
431 /*
432 *** SVE Integer Arithmetic - Unary Predicated Group
433 */
434
435 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
436 {
437 if (fn == NULL) {
438 return false;
439 }
440 if (sve_access_check(s)) {
441 unsigned vsz = vec_full_reg_size(s);
442 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
443 vec_full_reg_offset(s, a->rn),
444 pred_full_reg_offset(s, a->pg),
445 vsz, vsz, 0, fn);
446 }
447 return true;
448 }
449
450 #define DO_ZPZ(NAME, name) \
451 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
452 { \
453 static gen_helper_gvec_3 * const fns[4] = { \
454 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
455 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
456 }; \
457 return do_zpz_ool(s, a, fns[a->esz]); \
458 }
459
460 DO_ZPZ(CLS, cls)
461 DO_ZPZ(CLZ, clz)
462 DO_ZPZ(CNT_zpz, cnt_zpz)
463 DO_ZPZ(CNOT, cnot)
464 DO_ZPZ(NOT_zpz, not_zpz)
465 DO_ZPZ(ABS, abs)
466 DO_ZPZ(NEG, neg)
467
468 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
469 {
470 static gen_helper_gvec_3 * const fns[4] = {
471 NULL,
472 gen_helper_sve_fabs_h,
473 gen_helper_sve_fabs_s,
474 gen_helper_sve_fabs_d
475 };
476 return do_zpz_ool(s, a, fns[a->esz]);
477 }
478
479 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
480 {
481 static gen_helper_gvec_3 * const fns[4] = {
482 NULL,
483 gen_helper_sve_fneg_h,
484 gen_helper_sve_fneg_s,
485 gen_helper_sve_fneg_d
486 };
487 return do_zpz_ool(s, a, fns[a->esz]);
488 }
489
490 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
491 {
492 static gen_helper_gvec_3 * const fns[4] = {
493 NULL,
494 gen_helper_sve_sxtb_h,
495 gen_helper_sve_sxtb_s,
496 gen_helper_sve_sxtb_d
497 };
498 return do_zpz_ool(s, a, fns[a->esz]);
499 }
500
501 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
502 {
503 static gen_helper_gvec_3 * const fns[4] = {
504 NULL,
505 gen_helper_sve_uxtb_h,
506 gen_helper_sve_uxtb_s,
507 gen_helper_sve_uxtb_d
508 };
509 return do_zpz_ool(s, a, fns[a->esz]);
510 }
511
512 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
513 {
514 static gen_helper_gvec_3 * const fns[4] = {
515 NULL, NULL,
516 gen_helper_sve_sxth_s,
517 gen_helper_sve_sxth_d
518 };
519 return do_zpz_ool(s, a, fns[a->esz]);
520 }
521
522 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
523 {
524 static gen_helper_gvec_3 * const fns[4] = {
525 NULL, NULL,
526 gen_helper_sve_uxth_s,
527 gen_helper_sve_uxth_d
528 };
529 return do_zpz_ool(s, a, fns[a->esz]);
530 }
531
532 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
533 {
534 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
535 }
536
537 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
538 {
539 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
540 }
541
542 #undef DO_ZPZ
543
544 /*
545 *** SVE Integer Reduction Group
546 */
547
548 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
549 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
550 gen_helper_gvec_reduc *fn)
551 {
552 unsigned vsz = vec_full_reg_size(s);
553 TCGv_ptr t_zn, t_pg;
554 TCGv_i32 desc;
555 TCGv_i64 temp;
556
557 if (fn == NULL) {
558 return false;
559 }
560 if (!sve_access_check(s)) {
561 return true;
562 }
563
564 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
565 temp = tcg_temp_new_i64();
566 t_zn = tcg_temp_new_ptr();
567 t_pg = tcg_temp_new_ptr();
568
569 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
570 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
571 fn(temp, t_zn, t_pg, desc);
572 tcg_temp_free_ptr(t_zn);
573 tcg_temp_free_ptr(t_pg);
574 tcg_temp_free_i32(desc);
575
576 write_fp_dreg(s, a->rd, temp);
577 tcg_temp_free_i64(temp);
578 return true;
579 }
580
581 #define DO_VPZ(NAME, name) \
582 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
583 { \
584 static gen_helper_gvec_reduc * const fns[4] = { \
585 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
586 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
587 }; \
588 return do_vpz_ool(s, a, fns[a->esz]); \
589 }
590
591 DO_VPZ(ORV, orv)
592 DO_VPZ(ANDV, andv)
593 DO_VPZ(EORV, eorv)
594
595 DO_VPZ(UADDV, uaddv)
596 DO_VPZ(SMAXV, smaxv)
597 DO_VPZ(UMAXV, umaxv)
598 DO_VPZ(SMINV, sminv)
599 DO_VPZ(UMINV, uminv)
600
601 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
602 {
603 static gen_helper_gvec_reduc * const fns[4] = {
604 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
605 gen_helper_sve_saddv_s, NULL
606 };
607 return do_vpz_ool(s, a, fns[a->esz]);
608 }
609
610 #undef DO_VPZ
611
612 /*
613 *** SVE Shift by Immediate - Predicated Group
614 */
615
616 /* Store zero into every active element of Zd. We will use this for two
617 * and three-operand predicated instructions for which logic dictates a
618 * zero result.
619 */
620 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
621 {
622 static gen_helper_gvec_2 * const fns[4] = {
623 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
624 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
625 };
626 if (sve_access_check(s)) {
627 unsigned vsz = vec_full_reg_size(s);
628 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
629 pred_full_reg_offset(s, pg),
630 vsz, vsz, 0, fns[esz]);
631 }
632 return true;
633 }
634
635 /* Copy Zn into Zd, storing zeros into inactive elements. */
636 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
637 {
638 static gen_helper_gvec_3 * const fns[4] = {
639 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
640 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
641 };
642 unsigned vsz = vec_full_reg_size(s);
643 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
644 vec_full_reg_offset(s, rn),
645 pred_full_reg_offset(s, pg),
646 vsz, vsz, 0, fns[esz]);
647 }
648
649 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
650 gen_helper_gvec_3 *fn)
651 {
652 if (sve_access_check(s)) {
653 unsigned vsz = vec_full_reg_size(s);
654 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
655 vec_full_reg_offset(s, a->rn),
656 pred_full_reg_offset(s, a->pg),
657 vsz, vsz, a->imm, fn);
658 }
659 return true;
660 }
661
662 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
663 {
664 static gen_helper_gvec_3 * const fns[4] = {
665 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
666 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
667 };
668 if (a->esz < 0) {
669 /* Invalid tsz encoding -- see tszimm_esz. */
670 return false;
671 }
672 /* Shift by element size is architecturally valid. For
673 arithmetic right-shift, it's the same as by one less. */
674 a->imm = MIN(a->imm, (8 << a->esz) - 1);
675 return do_zpzi_ool(s, a, fns[a->esz]);
676 }
677
678 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
679 {
680 static gen_helper_gvec_3 * const fns[4] = {
681 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
682 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
683 };
684 if (a->esz < 0) {
685 return false;
686 }
687 /* Shift by element size is architecturally valid.
688 For logical shifts, it is a zeroing operation. */
689 if (a->imm >= (8 << a->esz)) {
690 return do_clr_zp(s, a->rd, a->pg, a->esz);
691 } else {
692 return do_zpzi_ool(s, a, fns[a->esz]);
693 }
694 }
695
696 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
697 {
698 static gen_helper_gvec_3 * const fns[4] = {
699 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
700 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
701 };
702 if (a->esz < 0) {
703 return false;
704 }
705 /* Shift by element size is architecturally valid.
706 For logical shifts, it is a zeroing operation. */
707 if (a->imm >= (8 << a->esz)) {
708 return do_clr_zp(s, a->rd, a->pg, a->esz);
709 } else {
710 return do_zpzi_ool(s, a, fns[a->esz]);
711 }
712 }
713
714 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
715 {
716 static gen_helper_gvec_3 * const fns[4] = {
717 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
718 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
719 };
720 if (a->esz < 0) {
721 return false;
722 }
723 /* Shift by element size is architecturally valid. For arithmetic
724 right shift for division, it is a zeroing operation. */
725 if (a->imm >= (8 << a->esz)) {
726 return do_clr_zp(s, a->rd, a->pg, a->esz);
727 } else {
728 return do_zpzi_ool(s, a, fns[a->esz]);
729 }
730 }
731
732 /*
733 *** SVE Bitwise Shift - Predicated Group
734 */
735
736 #define DO_ZPZW(NAME, name) \
737 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
738 uint32_t insn) \
739 { \
740 static gen_helper_gvec_4 * const fns[3] = { \
741 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
742 gen_helper_sve_##name##_zpzw_s, \
743 }; \
744 if (a->esz < 0 || a->esz >= 3) { \
745 return false; \
746 } \
747 return do_zpzz_ool(s, a, fns[a->esz]); \
748 }
749
750 DO_ZPZW(ASR, asr)
751 DO_ZPZW(LSR, lsr)
752 DO_ZPZW(LSL, lsl)
753
754 #undef DO_ZPZW
755
756 /*
757 *** SVE Bitwise Shift - Unpredicated Group
758 */
759
760 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
761 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
762 int64_t, uint32_t, uint32_t))
763 {
764 if (a->esz < 0) {
765 /* Invalid tsz encoding -- see tszimm_esz. */
766 return false;
767 }
768 if (sve_access_check(s)) {
769 unsigned vsz = vec_full_reg_size(s);
770 /* Shift by element size is architecturally valid. For
771 arithmetic right-shift, it's the same as by one less.
772 Otherwise it is a zeroing operation. */
773 if (a->imm >= 8 << a->esz) {
774 if (asr) {
775 a->imm = (8 << a->esz) - 1;
776 } else {
777 do_dupi_z(s, a->rd, 0);
778 return true;
779 }
780 }
781 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
782 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
783 }
784 return true;
785 }
786
787 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
788 {
789 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
790 }
791
792 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
793 {
794 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
795 }
796
797 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
798 {
799 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
800 }
801
802 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
803 {
804 if (fn == NULL) {
805 return false;
806 }
807 if (sve_access_check(s)) {
808 unsigned vsz = vec_full_reg_size(s);
809 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
810 vec_full_reg_offset(s, a->rn),
811 vec_full_reg_offset(s, a->rm),
812 vsz, vsz, 0, fn);
813 }
814 return true;
815 }
816
817 #define DO_ZZW(NAME, name) \
818 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
819 uint32_t insn) \
820 { \
821 static gen_helper_gvec_3 * const fns[4] = { \
822 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
823 gen_helper_sve_##name##_zzw_s, NULL \
824 }; \
825 return do_zzw_ool(s, a, fns[a->esz]); \
826 }
827
828 DO_ZZW(ASR, asr)
829 DO_ZZW(LSR, lsr)
830 DO_ZZW(LSL, lsl)
831
832 #undef DO_ZZW
833
834 /*
835 *** SVE Integer Multiply-Add Group
836 */
837
838 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
839 gen_helper_gvec_5 *fn)
840 {
841 if (sve_access_check(s)) {
842 unsigned vsz = vec_full_reg_size(s);
843 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
844 vec_full_reg_offset(s, a->ra),
845 vec_full_reg_offset(s, a->rn),
846 vec_full_reg_offset(s, a->rm),
847 pred_full_reg_offset(s, a->pg),
848 vsz, vsz, 0, fn);
849 }
850 return true;
851 }
852
853 #define DO_ZPZZZ(NAME, name) \
854 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
855 { \
856 static gen_helper_gvec_5 * const fns[4] = { \
857 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
858 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
859 }; \
860 return do_zpzzz_ool(s, a, fns[a->esz]); \
861 }
862
863 DO_ZPZZZ(MLA, mla)
864 DO_ZPZZZ(MLS, mls)
865
866 #undef DO_ZPZZZ
867
868 /*
869 *** SVE Index Generation Group
870 */
871
872 static void do_index(DisasContext *s, int esz, int rd,
873 TCGv_i64 start, TCGv_i64 incr)
874 {
875 unsigned vsz = vec_full_reg_size(s);
876 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
877 TCGv_ptr t_zd = tcg_temp_new_ptr();
878
879 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
880 if (esz == 3) {
881 gen_helper_sve_index_d(t_zd, start, incr, desc);
882 } else {
883 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
884 static index_fn * const fns[3] = {
885 gen_helper_sve_index_b,
886 gen_helper_sve_index_h,
887 gen_helper_sve_index_s,
888 };
889 TCGv_i32 s32 = tcg_temp_new_i32();
890 TCGv_i32 i32 = tcg_temp_new_i32();
891
892 tcg_gen_extrl_i64_i32(s32, start);
893 tcg_gen_extrl_i64_i32(i32, incr);
894 fns[esz](t_zd, s32, i32, desc);
895
896 tcg_temp_free_i32(s32);
897 tcg_temp_free_i32(i32);
898 }
899 tcg_temp_free_ptr(t_zd);
900 tcg_temp_free_i32(desc);
901 }
902
903 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
904 {
905 if (sve_access_check(s)) {
906 TCGv_i64 start = tcg_const_i64(a->imm1);
907 TCGv_i64 incr = tcg_const_i64(a->imm2);
908 do_index(s, a->esz, a->rd, start, incr);
909 tcg_temp_free_i64(start);
910 tcg_temp_free_i64(incr);
911 }
912 return true;
913 }
914
915 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
916 {
917 if (sve_access_check(s)) {
918 TCGv_i64 start = tcg_const_i64(a->imm);
919 TCGv_i64 incr = cpu_reg(s, a->rm);
920 do_index(s, a->esz, a->rd, start, incr);
921 tcg_temp_free_i64(start);
922 }
923 return true;
924 }
925
926 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
927 {
928 if (sve_access_check(s)) {
929 TCGv_i64 start = cpu_reg(s, a->rn);
930 TCGv_i64 incr = tcg_const_i64(a->imm);
931 do_index(s, a->esz, a->rd, start, incr);
932 tcg_temp_free_i64(incr);
933 }
934 return true;
935 }
936
937 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
938 {
939 if (sve_access_check(s)) {
940 TCGv_i64 start = cpu_reg(s, a->rn);
941 TCGv_i64 incr = cpu_reg(s, a->rm);
942 do_index(s, a->esz, a->rd, start, incr);
943 }
944 return true;
945 }
946
947 /*
948 *** SVE Stack Allocation Group
949 */
950
951 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
952 {
953 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
954 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
955 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
956 return true;
957 }
958
959 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
960 {
961 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
962 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
963 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
964 return true;
965 }
966
967 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
968 {
969 TCGv_i64 reg = cpu_reg(s, a->rd);
970 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
971 return true;
972 }
973
974 /*
975 *** SVE Compute Vector Address Group
976 */
977
978 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
979 {
980 if (sve_access_check(s)) {
981 unsigned vsz = vec_full_reg_size(s);
982 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
983 vec_full_reg_offset(s, a->rn),
984 vec_full_reg_offset(s, a->rm),
985 vsz, vsz, a->imm, fn);
986 }
987 return true;
988 }
989
990 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
991 {
992 return do_adr(s, a, gen_helper_sve_adr_p32);
993 }
994
995 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
996 {
997 return do_adr(s, a, gen_helper_sve_adr_p64);
998 }
999
1000 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
1001 {
1002 return do_adr(s, a, gen_helper_sve_adr_s32);
1003 }
1004
1005 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
1006 {
1007 return do_adr(s, a, gen_helper_sve_adr_u32);
1008 }
1009
1010 /*
1011 *** SVE Integer Misc - Unpredicated Group
1012 */
1013
1014 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1015 {
1016 static gen_helper_gvec_2 * const fns[4] = {
1017 NULL,
1018 gen_helper_sve_fexpa_h,
1019 gen_helper_sve_fexpa_s,
1020 gen_helper_sve_fexpa_d,
1021 };
1022 if (a->esz == 0) {
1023 return false;
1024 }
1025 if (sve_access_check(s)) {
1026 unsigned vsz = vec_full_reg_size(s);
1027 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1028 vec_full_reg_offset(s, a->rn),
1029 vsz, vsz, 0, fns[a->esz]);
1030 }
1031 return true;
1032 }
1033
1034 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1035 {
1036 static gen_helper_gvec_3 * const fns[4] = {
1037 NULL,
1038 gen_helper_sve_ftssel_h,
1039 gen_helper_sve_ftssel_s,
1040 gen_helper_sve_ftssel_d,
1041 };
1042 if (a->esz == 0) {
1043 return false;
1044 }
1045 if (sve_access_check(s)) {
1046 unsigned vsz = vec_full_reg_size(s);
1047 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1048 vec_full_reg_offset(s, a->rn),
1049 vec_full_reg_offset(s, a->rm),
1050 vsz, vsz, 0, fns[a->esz]);
1051 }
1052 return true;
1053 }
1054
1055 /*
1056 *** SVE Predicate Logical Operations Group
1057 */
1058
1059 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1060 const GVecGen4 *gvec_op)
1061 {
1062 if (!sve_access_check(s)) {
1063 return true;
1064 }
1065
1066 unsigned psz = pred_gvec_reg_size(s);
1067 int dofs = pred_full_reg_offset(s, a->rd);
1068 int nofs = pred_full_reg_offset(s, a->rn);
1069 int mofs = pred_full_reg_offset(s, a->rm);
1070 int gofs = pred_full_reg_offset(s, a->pg);
1071
1072 if (psz == 8) {
1073 /* Do the operation and the flags generation in temps. */
1074 TCGv_i64 pd = tcg_temp_new_i64();
1075 TCGv_i64 pn = tcg_temp_new_i64();
1076 TCGv_i64 pm = tcg_temp_new_i64();
1077 TCGv_i64 pg = tcg_temp_new_i64();
1078
1079 tcg_gen_ld_i64(pn, cpu_env, nofs);
1080 tcg_gen_ld_i64(pm, cpu_env, mofs);
1081 tcg_gen_ld_i64(pg, cpu_env, gofs);
1082
1083 gvec_op->fni8(pd, pn, pm, pg);
1084 tcg_gen_st_i64(pd, cpu_env, dofs);
1085
1086 do_predtest1(pd, pg);
1087
1088 tcg_temp_free_i64(pd);
1089 tcg_temp_free_i64(pn);
1090 tcg_temp_free_i64(pm);
1091 tcg_temp_free_i64(pg);
1092 } else {
1093 /* The operation and flags generation is large. The computation
1094 * of the flags depends on the original contents of the guarding
1095 * predicate. If the destination overwrites the guarding predicate,
1096 * then the easiest way to get this right is to save a copy.
1097 */
1098 int tofs = gofs;
1099 if (a->rd == a->pg) {
1100 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1101 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1102 }
1103
1104 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1105 do_predtest(s, dofs, tofs, psz / 8);
1106 }
1107 return true;
1108 }
1109
1110 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1111 {
1112 tcg_gen_and_i64(pd, pn, pm);
1113 tcg_gen_and_i64(pd, pd, pg);
1114 }
1115
1116 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1117 TCGv_vec pm, TCGv_vec pg)
1118 {
1119 tcg_gen_and_vec(vece, pd, pn, pm);
1120 tcg_gen_and_vec(vece, pd, pd, pg);
1121 }
1122
1123 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1124 {
1125 static const GVecGen4 op = {
1126 .fni8 = gen_and_pg_i64,
1127 .fniv = gen_and_pg_vec,
1128 .fno = gen_helper_sve_and_pppp,
1129 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1130 };
1131 if (a->s) {
1132 return do_pppp_flags(s, a, &op);
1133 } else if (a->rn == a->rm) {
1134 if (a->pg == a->rn) {
1135 return do_mov_p(s, a->rd, a->rn);
1136 } else {
1137 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1138 }
1139 } else if (a->pg == a->rn || a->pg == a->rm) {
1140 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1141 } else {
1142 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1143 }
1144 }
1145
1146 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1147 {
1148 tcg_gen_andc_i64(pd, pn, pm);
1149 tcg_gen_and_i64(pd, pd, pg);
1150 }
1151
1152 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1153 TCGv_vec pm, TCGv_vec pg)
1154 {
1155 tcg_gen_andc_vec(vece, pd, pn, pm);
1156 tcg_gen_and_vec(vece, pd, pd, pg);
1157 }
1158
1159 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1160 {
1161 static const GVecGen4 op = {
1162 .fni8 = gen_bic_pg_i64,
1163 .fniv = gen_bic_pg_vec,
1164 .fno = gen_helper_sve_bic_pppp,
1165 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1166 };
1167 if (a->s) {
1168 return do_pppp_flags(s, a, &op);
1169 } else if (a->pg == a->rn) {
1170 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1171 } else {
1172 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1173 }
1174 }
1175
1176 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1177 {
1178 tcg_gen_xor_i64(pd, pn, pm);
1179 tcg_gen_and_i64(pd, pd, pg);
1180 }
1181
1182 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1183 TCGv_vec pm, TCGv_vec pg)
1184 {
1185 tcg_gen_xor_vec(vece, pd, pn, pm);
1186 tcg_gen_and_vec(vece, pd, pd, pg);
1187 }
1188
1189 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1190 {
1191 static const GVecGen4 op = {
1192 .fni8 = gen_eor_pg_i64,
1193 .fniv = gen_eor_pg_vec,
1194 .fno = gen_helper_sve_eor_pppp,
1195 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1196 };
1197 if (a->s) {
1198 return do_pppp_flags(s, a, &op);
1199 } else {
1200 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1201 }
1202 }
1203
1204 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1205 {
1206 tcg_gen_and_i64(pn, pn, pg);
1207 tcg_gen_andc_i64(pm, pm, pg);
1208 tcg_gen_or_i64(pd, pn, pm);
1209 }
1210
1211 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1212 TCGv_vec pm, TCGv_vec pg)
1213 {
1214 tcg_gen_and_vec(vece, pn, pn, pg);
1215 tcg_gen_andc_vec(vece, pm, pm, pg);
1216 tcg_gen_or_vec(vece, pd, pn, pm);
1217 }
1218
1219 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1220 {
1221 static const GVecGen4 op = {
1222 .fni8 = gen_sel_pg_i64,
1223 .fniv = gen_sel_pg_vec,
1224 .fno = gen_helper_sve_sel_pppp,
1225 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1226 };
1227 if (a->s) {
1228 return false;
1229 } else {
1230 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1231 }
1232 }
1233
1234 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1235 {
1236 tcg_gen_or_i64(pd, pn, pm);
1237 tcg_gen_and_i64(pd, pd, pg);
1238 }
1239
1240 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1241 TCGv_vec pm, TCGv_vec pg)
1242 {
1243 tcg_gen_or_vec(vece, pd, pn, pm);
1244 tcg_gen_and_vec(vece, pd, pd, pg);
1245 }
1246
1247 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1248 {
1249 static const GVecGen4 op = {
1250 .fni8 = gen_orr_pg_i64,
1251 .fniv = gen_orr_pg_vec,
1252 .fno = gen_helper_sve_orr_pppp,
1253 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1254 };
1255 if (a->s) {
1256 return do_pppp_flags(s, a, &op);
1257 } else if (a->pg == a->rn && a->rn == a->rm) {
1258 return do_mov_p(s, a->rd, a->rn);
1259 } else {
1260 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261 }
1262 }
1263
1264 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265 {
1266 tcg_gen_orc_i64(pd, pn, pm);
1267 tcg_gen_and_i64(pd, pd, pg);
1268 }
1269
1270 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271 TCGv_vec pm, TCGv_vec pg)
1272 {
1273 tcg_gen_orc_vec(vece, pd, pn, pm);
1274 tcg_gen_and_vec(vece, pd, pd, pg);
1275 }
1276
1277 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1278 {
1279 static const GVecGen4 op = {
1280 .fni8 = gen_orn_pg_i64,
1281 .fniv = gen_orn_pg_vec,
1282 .fno = gen_helper_sve_orn_pppp,
1283 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284 };
1285 if (a->s) {
1286 return do_pppp_flags(s, a, &op);
1287 } else {
1288 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289 }
1290 }
1291
1292 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1293 {
1294 tcg_gen_or_i64(pd, pn, pm);
1295 tcg_gen_andc_i64(pd, pg, pd);
1296 }
1297
1298 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1299 TCGv_vec pm, TCGv_vec pg)
1300 {
1301 tcg_gen_or_vec(vece, pd, pn, pm);
1302 tcg_gen_andc_vec(vece, pd, pg, pd);
1303 }
1304
1305 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1306 {
1307 static const GVecGen4 op = {
1308 .fni8 = gen_nor_pg_i64,
1309 .fniv = gen_nor_pg_vec,
1310 .fno = gen_helper_sve_nor_pppp,
1311 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1312 };
1313 if (a->s) {
1314 return do_pppp_flags(s, a, &op);
1315 } else {
1316 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1317 }
1318 }
1319
1320 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1321 {
1322 tcg_gen_and_i64(pd, pn, pm);
1323 tcg_gen_andc_i64(pd, pg, pd);
1324 }
1325
1326 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1327 TCGv_vec pm, TCGv_vec pg)
1328 {
1329 tcg_gen_and_vec(vece, pd, pn, pm);
1330 tcg_gen_andc_vec(vece, pd, pg, pd);
1331 }
1332
1333 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1334 {
1335 static const GVecGen4 op = {
1336 .fni8 = gen_nand_pg_i64,
1337 .fniv = gen_nand_pg_vec,
1338 .fno = gen_helper_sve_nand_pppp,
1339 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1340 };
1341 if (a->s) {
1342 return do_pppp_flags(s, a, &op);
1343 } else {
1344 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1345 }
1346 }
1347
1348 /*
1349 *** SVE Predicate Misc Group
1350 */
1351
1352 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1353 {
1354 if (sve_access_check(s)) {
1355 int nofs = pred_full_reg_offset(s, a->rn);
1356 int gofs = pred_full_reg_offset(s, a->pg);
1357 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1358
1359 if (words == 1) {
1360 TCGv_i64 pn = tcg_temp_new_i64();
1361 TCGv_i64 pg = tcg_temp_new_i64();
1362
1363 tcg_gen_ld_i64(pn, cpu_env, nofs);
1364 tcg_gen_ld_i64(pg, cpu_env, gofs);
1365 do_predtest1(pn, pg);
1366
1367 tcg_temp_free_i64(pn);
1368 tcg_temp_free_i64(pg);
1369 } else {
1370 do_predtest(s, nofs, gofs, words);
1371 }
1372 }
1373 return true;
1374 }
1375
1376 /* See the ARM pseudocode DecodePredCount. */
1377 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1378 {
1379 unsigned elements = fullsz >> esz;
1380 unsigned bound;
1381
1382 switch (pattern) {
1383 case 0x0: /* POW2 */
1384 return pow2floor(elements);
1385 case 0x1: /* VL1 */
1386 case 0x2: /* VL2 */
1387 case 0x3: /* VL3 */
1388 case 0x4: /* VL4 */
1389 case 0x5: /* VL5 */
1390 case 0x6: /* VL6 */
1391 case 0x7: /* VL7 */
1392 case 0x8: /* VL8 */
1393 bound = pattern;
1394 break;
1395 case 0x9: /* VL16 */
1396 case 0xa: /* VL32 */
1397 case 0xb: /* VL64 */
1398 case 0xc: /* VL128 */
1399 case 0xd: /* VL256 */
1400 bound = 16 << (pattern - 9);
1401 break;
1402 case 0x1d: /* MUL4 */
1403 return elements - elements % 4;
1404 case 0x1e: /* MUL3 */
1405 return elements - elements % 3;
1406 case 0x1f: /* ALL */
1407 return elements;
1408 default: /* #uimm5 */
1409 return 0;
1410 }
1411 return elements >= bound ? bound : 0;
1412 }
1413
1414 /* This handles all of the predicate initialization instructions,
1415 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1416 * so that decode_pred_count returns 0. For SETFFR, we will have
1417 * set RD == 16 == FFR.
1418 */
1419 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1420 {
1421 if (!sve_access_check(s)) {
1422 return true;
1423 }
1424
1425 unsigned fullsz = vec_full_reg_size(s);
1426 unsigned ofs = pred_full_reg_offset(s, rd);
1427 unsigned numelem, setsz, i;
1428 uint64_t word, lastword;
1429 TCGv_i64 t;
1430
1431 numelem = decode_pred_count(fullsz, pat, esz);
1432
1433 /* Determine what we must store into each bit, and how many. */
1434 if (numelem == 0) {
1435 lastword = word = 0;
1436 setsz = fullsz;
1437 } else {
1438 setsz = numelem << esz;
1439 lastword = word = pred_esz_masks[esz];
1440 if (setsz % 64) {
1441 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1442 }
1443 }
1444
1445 t = tcg_temp_new_i64();
1446 if (fullsz <= 64) {
1447 tcg_gen_movi_i64(t, lastword);
1448 tcg_gen_st_i64(t, cpu_env, ofs);
1449 goto done;
1450 }
1451
1452 if (word == lastword) {
1453 unsigned maxsz = size_for_gvec(fullsz / 8);
1454 unsigned oprsz = size_for_gvec(setsz / 8);
1455
1456 if (oprsz * 8 == setsz) {
1457 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1458 goto done;
1459 }
1460 }
1461
1462 setsz /= 8;
1463 fullsz /= 8;
1464
1465 tcg_gen_movi_i64(t, word);
1466 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1467 tcg_gen_st_i64(t, cpu_env, ofs + i);
1468 }
1469 if (lastword != word) {
1470 tcg_gen_movi_i64(t, lastword);
1471 tcg_gen_st_i64(t, cpu_env, ofs + i);
1472 i += 8;
1473 }
1474 if (i < fullsz) {
1475 tcg_gen_movi_i64(t, 0);
1476 for (; i < fullsz; i += 8) {
1477 tcg_gen_st_i64(t, cpu_env, ofs + i);
1478 }
1479 }
1480
1481 done:
1482 tcg_temp_free_i64(t);
1483
1484 /* PTRUES */
1485 if (setflag) {
1486 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1487 tcg_gen_movi_i32(cpu_CF, word == 0);
1488 tcg_gen_movi_i32(cpu_VF, 0);
1489 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1490 }
1491 return true;
1492 }
1493
1494 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1495 {
1496 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1497 }
1498
1499 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1500 {
1501 /* Note pat == 31 is #all, to set all elements. */
1502 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1503 }
1504
1505 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1506 {
1507 /* Note pat == 32 is #unimp, to set no elements. */
1508 return do_predset(s, 0, a->rd, 32, false);
1509 }
1510
1511 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1512 {
1513 /* The path through do_pppp_flags is complicated enough to want to avoid
1514 * duplication. Frob the arguments into the form of a predicated AND.
1515 */
1516 arg_rprr_s alt_a = {
1517 .rd = a->rd, .pg = a->pg, .s = a->s,
1518 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1519 };
1520 return trans_AND_pppp(s, &alt_a, insn);
1521 }
1522
1523 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1524 {
1525 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1526 }
1527
1528 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1529 {
1530 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1531 }
1532
1533 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1534 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1535 TCGv_ptr, TCGv_i32))
1536 {
1537 if (!sve_access_check(s)) {
1538 return true;
1539 }
1540
1541 TCGv_ptr t_pd = tcg_temp_new_ptr();
1542 TCGv_ptr t_pg = tcg_temp_new_ptr();
1543 TCGv_i32 t;
1544 unsigned desc;
1545
1546 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1547 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1548
1549 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1550 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1551 t = tcg_const_i32(desc);
1552
1553 gen_fn(t, t_pd, t_pg, t);
1554 tcg_temp_free_ptr(t_pd);
1555 tcg_temp_free_ptr(t_pg);
1556
1557 do_pred_flags(t);
1558 tcg_temp_free_i32(t);
1559 return true;
1560 }
1561
1562 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1563 {
1564 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1565 }
1566
1567 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1568 {
1569 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1570 }
1571
1572 /*
1573 *** SVE Element Count Group
1574 */
1575
1576 /* Perform an inline saturating addition of a 32-bit value within
1577 * a 64-bit register. The second operand is known to be positive,
1578 * which halves the comparisions we must perform to bound the result.
1579 */
1580 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1581 {
1582 int64_t ibound;
1583 TCGv_i64 bound;
1584 TCGCond cond;
1585
1586 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1587 if (u) {
1588 tcg_gen_ext32u_i64(reg, reg);
1589 } else {
1590 tcg_gen_ext32s_i64(reg, reg);
1591 }
1592 if (d) {
1593 tcg_gen_sub_i64(reg, reg, val);
1594 ibound = (u ? 0 : INT32_MIN);
1595 cond = TCG_COND_LT;
1596 } else {
1597 tcg_gen_add_i64(reg, reg, val);
1598 ibound = (u ? UINT32_MAX : INT32_MAX);
1599 cond = TCG_COND_GT;
1600 }
1601 bound = tcg_const_i64(ibound);
1602 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1603 tcg_temp_free_i64(bound);
1604 }
1605
1606 /* Similarly with 64-bit values. */
1607 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1608 {
1609 TCGv_i64 t0 = tcg_temp_new_i64();
1610 TCGv_i64 t1 = tcg_temp_new_i64();
1611 TCGv_i64 t2;
1612
1613 if (u) {
1614 if (d) {
1615 tcg_gen_sub_i64(t0, reg, val);
1616 tcg_gen_movi_i64(t1, 0);
1617 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1618 } else {
1619 tcg_gen_add_i64(t0, reg, val);
1620 tcg_gen_movi_i64(t1, -1);
1621 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1622 }
1623 } else {
1624 if (d) {
1625 /* Detect signed overflow for subtraction. */
1626 tcg_gen_xor_i64(t0, reg, val);
1627 tcg_gen_sub_i64(t1, reg, val);
1628 tcg_gen_xor_i64(reg, reg, t1);
1629 tcg_gen_and_i64(t0, t0, reg);
1630
1631 /* Bound the result. */
1632 tcg_gen_movi_i64(reg, INT64_MIN);
1633 t2 = tcg_const_i64(0);
1634 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1635 } else {
1636 /* Detect signed overflow for addition. */
1637 tcg_gen_xor_i64(t0, reg, val);
1638 tcg_gen_add_i64(reg, reg, val);
1639 tcg_gen_xor_i64(t1, reg, val);
1640 tcg_gen_andc_i64(t0, t1, t0);
1641
1642 /* Bound the result. */
1643 tcg_gen_movi_i64(t1, INT64_MAX);
1644 t2 = tcg_const_i64(0);
1645 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1646 }
1647 tcg_temp_free_i64(t2);
1648 }
1649 tcg_temp_free_i64(t0);
1650 tcg_temp_free_i64(t1);
1651 }
1652
1653 /* Similarly with a vector and a scalar operand. */
1654 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1655 TCGv_i64 val, bool u, bool d)
1656 {
1657 unsigned vsz = vec_full_reg_size(s);
1658 TCGv_ptr dptr, nptr;
1659 TCGv_i32 t32, desc;
1660 TCGv_i64 t64;
1661
1662 dptr = tcg_temp_new_ptr();
1663 nptr = tcg_temp_new_ptr();
1664 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1665 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1666 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1667
1668 switch (esz) {
1669 case MO_8:
1670 t32 = tcg_temp_new_i32();
1671 tcg_gen_extrl_i64_i32(t32, val);
1672 if (d) {
1673 tcg_gen_neg_i32(t32, t32);
1674 }
1675 if (u) {
1676 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1677 } else {
1678 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1679 }
1680 tcg_temp_free_i32(t32);
1681 break;
1682
1683 case MO_16:
1684 t32 = tcg_temp_new_i32();
1685 tcg_gen_extrl_i64_i32(t32, val);
1686 if (d) {
1687 tcg_gen_neg_i32(t32, t32);
1688 }
1689 if (u) {
1690 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1691 } else {
1692 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1693 }
1694 tcg_temp_free_i32(t32);
1695 break;
1696
1697 case MO_32:
1698 t64 = tcg_temp_new_i64();
1699 if (d) {
1700 tcg_gen_neg_i64(t64, val);
1701 } else {
1702 tcg_gen_mov_i64(t64, val);
1703 }
1704 if (u) {
1705 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1706 } else {
1707 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1708 }
1709 tcg_temp_free_i64(t64);
1710 break;
1711
1712 case MO_64:
1713 if (u) {
1714 if (d) {
1715 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1716 } else {
1717 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1718 }
1719 } else if (d) {
1720 t64 = tcg_temp_new_i64();
1721 tcg_gen_neg_i64(t64, val);
1722 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1723 tcg_temp_free_i64(t64);
1724 } else {
1725 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1726 }
1727 break;
1728
1729 default:
1730 g_assert_not_reached();
1731 }
1732
1733 tcg_temp_free_ptr(dptr);
1734 tcg_temp_free_ptr(nptr);
1735 tcg_temp_free_i32(desc);
1736 }
1737
1738 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1739 {
1740 if (sve_access_check(s)) {
1741 unsigned fullsz = vec_full_reg_size(s);
1742 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1743 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1744 }
1745 return true;
1746 }
1747
1748 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1749 {
1750 if (sve_access_check(s)) {
1751 unsigned fullsz = vec_full_reg_size(s);
1752 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1753 int inc = numelem * a->imm * (a->d ? -1 : 1);
1754 TCGv_i64 reg = cpu_reg(s, a->rd);
1755
1756 tcg_gen_addi_i64(reg, reg, inc);
1757 }
1758 return true;
1759 }
1760
1761 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1762 uint32_t insn)
1763 {
1764 if (!sve_access_check(s)) {
1765 return true;
1766 }
1767
1768 unsigned fullsz = vec_full_reg_size(s);
1769 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1770 int inc = numelem * a->imm;
1771 TCGv_i64 reg = cpu_reg(s, a->rd);
1772
1773 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1774 if (inc == 0) {
1775 if (a->u) {
1776 tcg_gen_ext32u_i64(reg, reg);
1777 } else {
1778 tcg_gen_ext32s_i64(reg, reg);
1779 }
1780 } else {
1781 TCGv_i64 t = tcg_const_i64(inc);
1782 do_sat_addsub_32(reg, t, a->u, a->d);
1783 tcg_temp_free_i64(t);
1784 }
1785 return true;
1786 }
1787
1788 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1789 uint32_t insn)
1790 {
1791 if (!sve_access_check(s)) {
1792 return true;
1793 }
1794
1795 unsigned fullsz = vec_full_reg_size(s);
1796 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1797 int inc = numelem * a->imm;
1798 TCGv_i64 reg = cpu_reg(s, a->rd);
1799
1800 if (inc != 0) {
1801 TCGv_i64 t = tcg_const_i64(inc);
1802 do_sat_addsub_64(reg, t, a->u, a->d);
1803 tcg_temp_free_i64(t);
1804 }
1805 return true;
1806 }
1807
1808 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1809 {
1810 if (a->esz == 0) {
1811 return false;
1812 }
1813
1814 unsigned fullsz = vec_full_reg_size(s);
1815 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1816 int inc = numelem * a->imm;
1817
1818 if (inc != 0) {
1819 if (sve_access_check(s)) {
1820 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1821 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1822 vec_full_reg_offset(s, a->rn),
1823 t, fullsz, fullsz);
1824 tcg_temp_free_i64(t);
1825 }
1826 } else {
1827 do_mov_z(s, a->rd, a->rn);
1828 }
1829 return true;
1830 }
1831
1832 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1833 uint32_t insn)
1834 {
1835 if (a->esz == 0) {
1836 return false;
1837 }
1838
1839 unsigned fullsz = vec_full_reg_size(s);
1840 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1841 int inc = numelem * a->imm;
1842
1843 if (inc != 0) {
1844 if (sve_access_check(s)) {
1845 TCGv_i64 t = tcg_const_i64(inc);
1846 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1847 tcg_temp_free_i64(t);
1848 }
1849 } else {
1850 do_mov_z(s, a->rd, a->rn);
1851 }
1852 return true;
1853 }
1854
1855 /*
1856 *** SVE Bitwise Immediate Group
1857 */
1858
1859 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1860 {
1861 uint64_t imm;
1862 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1863 extract32(a->dbm, 0, 6),
1864 extract32(a->dbm, 6, 6))) {
1865 return false;
1866 }
1867 if (sve_access_check(s)) {
1868 unsigned vsz = vec_full_reg_size(s);
1869 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1870 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1871 }
1872 return true;
1873 }
1874
1875 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1876 {
1877 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1878 }
1879
1880 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1881 {
1882 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1883 }
1884
1885 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1886 {
1887 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1888 }
1889
1890 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1891 {
1892 uint64_t imm;
1893 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1894 extract32(a->dbm, 0, 6),
1895 extract32(a->dbm, 6, 6))) {
1896 return false;
1897 }
1898 if (sve_access_check(s)) {
1899 do_dupi_z(s, a->rd, imm);
1900 }
1901 return true;
1902 }
1903
1904 /*
1905 *** SVE Integer Wide Immediate - Predicated Group
1906 */
1907
1908 /* Implement all merging copies. This is used for CPY (immediate),
1909 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1910 */
1911 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1912 TCGv_i64 val)
1913 {
1914 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1915 static gen_cpy * const fns[4] = {
1916 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1917 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1918 };
1919 unsigned vsz = vec_full_reg_size(s);
1920 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1921 TCGv_ptr t_zd = tcg_temp_new_ptr();
1922 TCGv_ptr t_zn = tcg_temp_new_ptr();
1923 TCGv_ptr t_pg = tcg_temp_new_ptr();
1924
1925 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1926 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1927 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1928
1929 fns[esz](t_zd, t_zn, t_pg, val, desc);
1930
1931 tcg_temp_free_ptr(t_zd);
1932 tcg_temp_free_ptr(t_zn);
1933 tcg_temp_free_ptr(t_pg);
1934 tcg_temp_free_i32(desc);
1935 }
1936
1937 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1938 {
1939 if (a->esz == 0) {
1940 return false;
1941 }
1942 if (sve_access_check(s)) {
1943 /* Decode the VFP immediate. */
1944 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1945 TCGv_i64 t_imm = tcg_const_i64(imm);
1946 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1947 tcg_temp_free_i64(t_imm);
1948 }
1949 return true;
1950 }
1951
1952 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1953 {
1954 if (a->esz == 0 && extract32(insn, 13, 1)) {
1955 return false;
1956 }
1957 if (sve_access_check(s)) {
1958 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1959 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1960 tcg_temp_free_i64(t_imm);
1961 }
1962 return true;
1963 }
1964
1965 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1966 {
1967 static gen_helper_gvec_2i * const fns[4] = {
1968 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1969 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1970 };
1971
1972 if (a->esz == 0 && extract32(insn, 13, 1)) {
1973 return false;
1974 }
1975 if (sve_access_check(s)) {
1976 unsigned vsz = vec_full_reg_size(s);
1977 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1978 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1979 pred_full_reg_offset(s, a->pg),
1980 t_imm, vsz, vsz, 0, fns[a->esz]);
1981 tcg_temp_free_i64(t_imm);
1982 }
1983 return true;
1984 }
1985
1986 /*
1987 *** SVE Permute Extract Group
1988 */
1989
1990 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1991 {
1992 if (!sve_access_check(s)) {
1993 return true;
1994 }
1995
1996 unsigned vsz = vec_full_reg_size(s);
1997 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1998 unsigned n_siz = vsz - n_ofs;
1999 unsigned d = vec_full_reg_offset(s, a->rd);
2000 unsigned n = vec_full_reg_offset(s, a->rn);
2001 unsigned m = vec_full_reg_offset(s, a->rm);
2002
2003 /* Use host vector move insns if we have appropriate sizes
2004 * and no unfortunate overlap.
2005 */
2006 if (m != d
2007 && n_ofs == size_for_gvec(n_ofs)
2008 && n_siz == size_for_gvec(n_siz)
2009 && (d != n || n_siz <= n_ofs)) {
2010 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2011 if (n_ofs != 0) {
2012 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2013 }
2014 } else {
2015 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2016 }
2017 return true;
2018 }
2019
2020 /*
2021 *** SVE Permute - Unpredicated Group
2022 */
2023
2024 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2025 {
2026 if (sve_access_check(s)) {
2027 unsigned vsz = vec_full_reg_size(s);
2028 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2029 vsz, vsz, cpu_reg_sp(s, a->rn));
2030 }
2031 return true;
2032 }
2033
2034 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2035 {
2036 if ((a->imm & 0x1f) == 0) {
2037 return false;
2038 }
2039 if (sve_access_check(s)) {
2040 unsigned vsz = vec_full_reg_size(s);
2041 unsigned dofs = vec_full_reg_offset(s, a->rd);
2042 unsigned esz, index;
2043
2044 esz = ctz32(a->imm);
2045 index = a->imm >> (esz + 1);
2046
2047 if ((index << esz) < vsz) {
2048 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2049 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2050 } else {
2051 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2052 }
2053 }
2054 return true;
2055 }
2056
2057 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2058 {
2059 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2060 static gen_insr * const fns[4] = {
2061 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2062 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2063 };
2064 unsigned vsz = vec_full_reg_size(s);
2065 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2066 TCGv_ptr t_zd = tcg_temp_new_ptr();
2067 TCGv_ptr t_zn = tcg_temp_new_ptr();
2068
2069 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2070 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2071
2072 fns[a->esz](t_zd, t_zn, val, desc);
2073
2074 tcg_temp_free_ptr(t_zd);
2075 tcg_temp_free_ptr(t_zn);
2076 tcg_temp_free_i32(desc);
2077 }
2078
2079 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2080 {
2081 if (sve_access_check(s)) {
2082 TCGv_i64 t = tcg_temp_new_i64();
2083 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2084 do_insr_i64(s, a, t);
2085 tcg_temp_free_i64(t);
2086 }
2087 return true;
2088 }
2089
2090 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2091 {
2092 if (sve_access_check(s)) {
2093 do_insr_i64(s, a, cpu_reg(s, a->rm));
2094 }
2095 return true;
2096 }
2097
2098 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2099 {
2100 static gen_helper_gvec_2 * const fns[4] = {
2101 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2102 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2103 };
2104
2105 if (sve_access_check(s)) {
2106 unsigned vsz = vec_full_reg_size(s);
2107 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2108 vec_full_reg_offset(s, a->rn),
2109 vsz, vsz, 0, fns[a->esz]);
2110 }
2111 return true;
2112 }
2113
2114 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2115 {
2116 static gen_helper_gvec_3 * const fns[4] = {
2117 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2118 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2119 };
2120
2121 if (sve_access_check(s)) {
2122 unsigned vsz = vec_full_reg_size(s);
2123 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2124 vec_full_reg_offset(s, a->rn),
2125 vec_full_reg_offset(s, a->rm),
2126 vsz, vsz, 0, fns[a->esz]);
2127 }
2128 return true;
2129 }
2130
2131 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2132 {
2133 static gen_helper_gvec_2 * const fns[4][2] = {
2134 { NULL, NULL },
2135 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2136 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2137 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2138 };
2139
2140 if (a->esz == 0) {
2141 return false;
2142 }
2143 if (sve_access_check(s)) {
2144 unsigned vsz = vec_full_reg_size(s);
2145 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2146 vec_full_reg_offset(s, a->rn)
2147 + (a->h ? vsz / 2 : 0),
2148 vsz, vsz, 0, fns[a->esz][a->u]);
2149 }
2150 return true;
2151 }
2152
2153 /*
2154 *** SVE Permute - Predicates Group
2155 */
2156
2157 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2158 gen_helper_gvec_3 *fn)
2159 {
2160 if (!sve_access_check(s)) {
2161 return true;
2162 }
2163
2164 unsigned vsz = pred_full_reg_size(s);
2165
2166 /* Predicate sizes may be smaller and cannot use simd_desc.
2167 We cannot round up, as we do elsewhere, because we need
2168 the exact size for ZIP2 and REV. We retain the style for
2169 the other helpers for consistency. */
2170 TCGv_ptr t_d = tcg_temp_new_ptr();
2171 TCGv_ptr t_n = tcg_temp_new_ptr();
2172 TCGv_ptr t_m = tcg_temp_new_ptr();
2173 TCGv_i32 t_desc;
2174 int desc;
2175
2176 desc = vsz - 2;
2177 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2178 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2179
2180 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2181 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2182 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2183 t_desc = tcg_const_i32(desc);
2184
2185 fn(t_d, t_n, t_m, t_desc);
2186
2187 tcg_temp_free_ptr(t_d);
2188 tcg_temp_free_ptr(t_n);
2189 tcg_temp_free_ptr(t_m);
2190 tcg_temp_free_i32(t_desc);
2191 return true;
2192 }
2193
2194 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2195 gen_helper_gvec_2 *fn)
2196 {
2197 if (!sve_access_check(s)) {
2198 return true;
2199 }
2200
2201 unsigned vsz = pred_full_reg_size(s);
2202 TCGv_ptr t_d = tcg_temp_new_ptr();
2203 TCGv_ptr t_n = tcg_temp_new_ptr();
2204 TCGv_i32 t_desc;
2205 int desc;
2206
2207 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2208 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2209
2210 /* Predicate sizes may be smaller and cannot use simd_desc.
2211 We cannot round up, as we do elsewhere, because we need
2212 the exact size for ZIP2 and REV. We retain the style for
2213 the other helpers for consistency. */
2214
2215 desc = vsz - 2;
2216 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2217 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2218 t_desc = tcg_const_i32(desc);
2219
2220 fn(t_d, t_n, t_desc);
2221
2222 tcg_temp_free_i32(t_desc);
2223 tcg_temp_free_ptr(t_d);
2224 tcg_temp_free_ptr(t_n);
2225 return true;
2226 }
2227
2228 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2229 {
2230 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2231 }
2232
2233 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2234 {
2235 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2236 }
2237
2238 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2239 {
2240 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2241 }
2242
2243 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2244 {
2245 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2246 }
2247
2248 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2249 {
2250 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2251 }
2252
2253 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2254 {
2255 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2256 }
2257
2258 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2259 {
2260 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2261 }
2262
2263 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2264 {
2265 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2266 }
2267
2268 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2269 {
2270 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2271 }
2272
2273 /*
2274 *** SVE Permute - Interleaving Group
2275 */
2276
2277 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2278 {
2279 static gen_helper_gvec_3 * const fns[4] = {
2280 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2281 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2282 };
2283
2284 if (sve_access_check(s)) {
2285 unsigned vsz = vec_full_reg_size(s);
2286 unsigned high_ofs = high ? vsz / 2 : 0;
2287 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2288 vec_full_reg_offset(s, a->rn) + high_ofs,
2289 vec_full_reg_offset(s, a->rm) + high_ofs,
2290 vsz, vsz, 0, fns[a->esz]);
2291 }
2292 return true;
2293 }
2294
2295 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2296 gen_helper_gvec_3 *fn)
2297 {
2298 if (sve_access_check(s)) {
2299 unsigned vsz = vec_full_reg_size(s);
2300 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2301 vec_full_reg_offset(s, a->rn),
2302 vec_full_reg_offset(s, a->rm),
2303 vsz, vsz, data, fn);
2304 }
2305 return true;
2306 }
2307
2308 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2309 {
2310 return do_zip(s, a, false);
2311 }
2312
2313 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2314 {
2315 return do_zip(s, a, true);
2316 }
2317
2318 static gen_helper_gvec_3 * const uzp_fns[4] = {
2319 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2320 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2321 };
2322
2323 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2324 {
2325 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2326 }
2327
2328 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2329 {
2330 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2331 }
2332
2333 static gen_helper_gvec_3 * const trn_fns[4] = {
2334 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2335 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2336 };
2337
2338 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2339 {
2340 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2341 }
2342
2343 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2344 {
2345 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2346 }
2347
2348 /*
2349 *** SVE Permute Vector - Predicated Group
2350 */
2351
2352 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2353 {
2354 static gen_helper_gvec_3 * const fns[4] = {
2355 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2356 };
2357 return do_zpz_ool(s, a, fns[a->esz]);
2358 }
2359
2360 /* Call the helper that computes the ARM LastActiveElement pseudocode
2361 * function, scaled by the element size. This includes the not found
2362 * indication; e.g. not found for esz=3 is -8.
2363 */
2364 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2365 {
2366 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2367 * round up, as we do elsewhere, because we need the exact size.
2368 */
2369 TCGv_ptr t_p = tcg_temp_new_ptr();
2370 TCGv_i32 t_desc;
2371 unsigned vsz = pred_full_reg_size(s);
2372 unsigned desc;
2373
2374 desc = vsz - 2;
2375 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2376
2377 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2378 t_desc = tcg_const_i32(desc);
2379
2380 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2381
2382 tcg_temp_free_i32(t_desc);
2383 tcg_temp_free_ptr(t_p);
2384 }
2385
2386 /* Increment LAST to the offset of the next element in the vector,
2387 * wrapping around to 0.
2388 */
2389 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390 {
2391 unsigned vsz = vec_full_reg_size(s);
2392
2393 tcg_gen_addi_i32(last, last, 1 << esz);
2394 if (is_power_of_2(vsz)) {
2395 tcg_gen_andi_i32(last, last, vsz - 1);
2396 } else {
2397 TCGv_i32 max = tcg_const_i32(vsz);
2398 TCGv_i32 zero = tcg_const_i32(0);
2399 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2400 tcg_temp_free_i32(max);
2401 tcg_temp_free_i32(zero);
2402 }
2403 }
2404
2405 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2406 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2407 {
2408 unsigned vsz = vec_full_reg_size(s);
2409
2410 if (is_power_of_2(vsz)) {
2411 tcg_gen_andi_i32(last, last, vsz - 1);
2412 } else {
2413 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2414 TCGv_i32 zero = tcg_const_i32(0);
2415 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2416 tcg_temp_free_i32(max);
2417 tcg_temp_free_i32(zero);
2418 }
2419 }
2420
2421 /* Load an unsigned element of ESZ from BASE+OFS. */
2422 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2423 {
2424 TCGv_i64 r = tcg_temp_new_i64();
2425
2426 switch (esz) {
2427 case 0:
2428 tcg_gen_ld8u_i64(r, base, ofs);
2429 break;
2430 case 1:
2431 tcg_gen_ld16u_i64(r, base, ofs);
2432 break;
2433 case 2:
2434 tcg_gen_ld32u_i64(r, base, ofs);
2435 break;
2436 case 3:
2437 tcg_gen_ld_i64(r, base, ofs);
2438 break;
2439 default:
2440 g_assert_not_reached();
2441 }
2442 return r;
2443 }
2444
2445 /* Load an unsigned element of ESZ from RM[LAST]. */
2446 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2447 int rm, int esz)
2448 {
2449 TCGv_ptr p = tcg_temp_new_ptr();
2450 TCGv_i64 r;
2451
2452 /* Convert offset into vector into offset into ENV.
2453 * The final adjustment for the vector register base
2454 * is added via constant offset to the load.
2455 */
2456 #ifdef HOST_WORDS_BIGENDIAN
2457 /* Adjust for element ordering. See vec_reg_offset. */
2458 if (esz < 3) {
2459 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2460 }
2461 #endif
2462 tcg_gen_ext_i32_ptr(p, last);
2463 tcg_gen_add_ptr(p, p, cpu_env);
2464
2465 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2466 tcg_temp_free_ptr(p);
2467
2468 return r;
2469 }
2470
2471 /* Compute CLAST for a Zreg. */
2472 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2473 {
2474 TCGv_i32 last;
2475 TCGLabel *over;
2476 TCGv_i64 ele;
2477 unsigned vsz, esz = a->esz;
2478
2479 if (!sve_access_check(s)) {
2480 return true;
2481 }
2482
2483 last = tcg_temp_local_new_i32();
2484 over = gen_new_label();
2485
2486 find_last_active(s, last, esz, a->pg);
2487
2488 /* There is of course no movcond for a 2048-bit vector,
2489 * so we must branch over the actual store.
2490 */
2491 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2492
2493 if (!before) {
2494 incr_last_active(s, last, esz);
2495 }
2496
2497 ele = load_last_active(s, last, a->rm, esz);
2498 tcg_temp_free_i32(last);
2499
2500 vsz = vec_full_reg_size(s);
2501 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2502 tcg_temp_free_i64(ele);
2503
2504 /* If this insn used MOVPRFX, we may need a second move. */
2505 if (a->rd != a->rn) {
2506 TCGLabel *done = gen_new_label();
2507 tcg_gen_br(done);
2508
2509 gen_set_label(over);
2510 do_mov_z(s, a->rd, a->rn);
2511
2512 gen_set_label(done);
2513 } else {
2514 gen_set_label(over);
2515 }
2516 return true;
2517 }
2518
2519 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2520 {
2521 return do_clast_vector(s, a, false);
2522 }
2523
2524 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2525 {
2526 return do_clast_vector(s, a, true);
2527 }
2528
2529 /* Compute CLAST for a scalar. */
2530 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2531 bool before, TCGv_i64 reg_val)
2532 {
2533 TCGv_i32 last = tcg_temp_new_i32();
2534 TCGv_i64 ele, cmp, zero;
2535
2536 find_last_active(s, last, esz, pg);
2537
2538 /* Extend the original value of last prior to incrementing. */
2539 cmp = tcg_temp_new_i64();
2540 tcg_gen_ext_i32_i64(cmp, last);
2541
2542 if (!before) {
2543 incr_last_active(s, last, esz);
2544 }
2545
2546 /* The conceit here is that while last < 0 indicates not found, after
2547 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2548 * from which we can load garbage. We then discard the garbage with
2549 * a conditional move.
2550 */
2551 ele = load_last_active(s, last, rm, esz);
2552 tcg_temp_free_i32(last);
2553
2554 zero = tcg_const_i64(0);
2555 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2556
2557 tcg_temp_free_i64(zero);
2558 tcg_temp_free_i64(cmp);
2559 tcg_temp_free_i64(ele);
2560 }
2561
2562 /* Compute CLAST for a Vreg. */
2563 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2564 {
2565 if (sve_access_check(s)) {
2566 int esz = a->esz;
2567 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2568 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2569
2570 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2571 write_fp_dreg(s, a->rd, reg);
2572 tcg_temp_free_i64(reg);
2573 }
2574 return true;
2575 }
2576
2577 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2578 {
2579 return do_clast_fp(s, a, false);
2580 }
2581
2582 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2583 {
2584 return do_clast_fp(s, a, true);
2585 }
2586
2587 /* Compute CLAST for a Xreg. */
2588 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2589 {
2590 TCGv_i64 reg;
2591
2592 if (!sve_access_check(s)) {
2593 return true;
2594 }
2595
2596 reg = cpu_reg(s, a->rd);
2597 switch (a->esz) {
2598 case 0:
2599 tcg_gen_ext8u_i64(reg, reg);
2600 break;
2601 case 1:
2602 tcg_gen_ext16u_i64(reg, reg);
2603 break;
2604 case 2:
2605 tcg_gen_ext32u_i64(reg, reg);
2606 break;
2607 case 3:
2608 break;
2609 default:
2610 g_assert_not_reached();
2611 }
2612
2613 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2614 return true;
2615 }
2616
2617 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2618 {
2619 return do_clast_general(s, a, false);
2620 }
2621
2622 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2623 {
2624 return do_clast_general(s, a, true);
2625 }
2626
2627 /* Compute LAST for a scalar. */
2628 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2629 int pg, int rm, bool before)
2630 {
2631 TCGv_i32 last = tcg_temp_new_i32();
2632 TCGv_i64 ret;
2633
2634 find_last_active(s, last, esz, pg);
2635 if (before) {
2636 wrap_last_active(s, last, esz);
2637 } else {
2638 incr_last_active(s, last, esz);
2639 }
2640
2641 ret = load_last_active(s, last, rm, esz);
2642 tcg_temp_free_i32(last);
2643 return ret;
2644 }
2645
2646 /* Compute LAST for a Vreg. */
2647 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2648 {
2649 if (sve_access_check(s)) {
2650 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2651 write_fp_dreg(s, a->rd, val);
2652 tcg_temp_free_i64(val);
2653 }
2654 return true;
2655 }
2656
2657 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2658 {
2659 return do_last_fp(s, a, false);
2660 }
2661
2662 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2663 {
2664 return do_last_fp(s, a, true);
2665 }
2666
2667 /* Compute LAST for a Xreg. */
2668 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2669 {
2670 if (sve_access_check(s)) {
2671 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2672 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2673 tcg_temp_free_i64(val);
2674 }
2675 return true;
2676 }
2677
2678 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2679 {
2680 return do_last_general(s, a, false);
2681 }
2682
2683 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2684 {
2685 return do_last_general(s, a, true);
2686 }
2687
2688 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2689 {
2690 if (sve_access_check(s)) {
2691 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2692 }
2693 return true;
2694 }
2695
2696 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2697 {
2698 if (sve_access_check(s)) {
2699 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2700 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2701 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2702 tcg_temp_free_i64(t);
2703 }
2704 return true;
2705 }
2706
2707 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2708 {
2709 static gen_helper_gvec_3 * const fns[4] = {
2710 NULL,
2711 gen_helper_sve_revb_h,
2712 gen_helper_sve_revb_s,
2713 gen_helper_sve_revb_d,
2714 };
2715 return do_zpz_ool(s, a, fns[a->esz]);
2716 }
2717
2718 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2719 {
2720 static gen_helper_gvec_3 * const fns[4] = {
2721 NULL,
2722 NULL,
2723 gen_helper_sve_revh_s,
2724 gen_helper_sve_revh_d,
2725 };
2726 return do_zpz_ool(s, a, fns[a->esz]);
2727 }
2728
2729 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2730 {
2731 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2732 }
2733
2734 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2735 {
2736 static gen_helper_gvec_3 * const fns[4] = {
2737 gen_helper_sve_rbit_b,
2738 gen_helper_sve_rbit_h,
2739 gen_helper_sve_rbit_s,
2740 gen_helper_sve_rbit_d,
2741 };
2742 return do_zpz_ool(s, a, fns[a->esz]);
2743 }
2744
2745 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2746 {
2747 if (sve_access_check(s)) {
2748 unsigned vsz = vec_full_reg_size(s);
2749 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2750 vec_full_reg_offset(s, a->rn),
2751 vec_full_reg_offset(s, a->rm),
2752 pred_full_reg_offset(s, a->pg),
2753 vsz, vsz, a->esz, gen_helper_sve_splice);
2754 }
2755 return true;
2756 }
2757
2758 /*
2759 *** SVE Integer Compare - Vectors Group
2760 */
2761
2762 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2763 gen_helper_gvec_flags_4 *gen_fn)
2764 {
2765 TCGv_ptr pd, zn, zm, pg;
2766 unsigned vsz;
2767 TCGv_i32 t;
2768
2769 if (gen_fn == NULL) {
2770 return false;
2771 }
2772 if (!sve_access_check(s)) {
2773 return true;
2774 }
2775
2776 vsz = vec_full_reg_size(s);
2777 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2778 pd = tcg_temp_new_ptr();
2779 zn = tcg_temp_new_ptr();
2780 zm = tcg_temp_new_ptr();
2781 pg = tcg_temp_new_ptr();
2782
2783 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2784 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2785 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2786 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2787
2788 gen_fn(t, pd, zn, zm, pg, t);
2789
2790 tcg_temp_free_ptr(pd);
2791 tcg_temp_free_ptr(zn);
2792 tcg_temp_free_ptr(zm);
2793 tcg_temp_free_ptr(pg);
2794
2795 do_pred_flags(t);
2796
2797 tcg_temp_free_i32(t);
2798 return true;
2799 }
2800
2801 #define DO_PPZZ(NAME, name) \
2802 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2803 uint32_t insn) \
2804 { \
2805 static gen_helper_gvec_flags_4 * const fns[4] = { \
2806 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2807 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2808 }; \
2809 return do_ppzz_flags(s, a, fns[a->esz]); \
2810 }
2811
2812 DO_PPZZ(CMPEQ, cmpeq)
2813 DO_PPZZ(CMPNE, cmpne)
2814 DO_PPZZ(CMPGT, cmpgt)
2815 DO_PPZZ(CMPGE, cmpge)
2816 DO_PPZZ(CMPHI, cmphi)
2817 DO_PPZZ(CMPHS, cmphs)
2818
2819 #undef DO_PPZZ
2820
2821 #define DO_PPZW(NAME, name) \
2822 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2823 uint32_t insn) \
2824 { \
2825 static gen_helper_gvec_flags_4 * const fns[4] = { \
2826 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2827 gen_helper_sve_##name##_ppzw_s, NULL \
2828 }; \
2829 return do_ppzz_flags(s, a, fns[a->esz]); \
2830 }
2831
2832 DO_PPZW(CMPEQ, cmpeq)
2833 DO_PPZW(CMPNE, cmpne)
2834 DO_PPZW(CMPGT, cmpgt)
2835 DO_PPZW(CMPGE, cmpge)
2836 DO_PPZW(CMPHI, cmphi)
2837 DO_PPZW(CMPHS, cmphs)
2838 DO_PPZW(CMPLT, cmplt)
2839 DO_PPZW(CMPLE, cmple)
2840 DO_PPZW(CMPLO, cmplo)
2841 DO_PPZW(CMPLS, cmpls)
2842
2843 #undef DO_PPZW
2844
2845 /*
2846 *** SVE Integer Compare - Immediate Groups
2847 */
2848
2849 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2850 gen_helper_gvec_flags_3 *gen_fn)
2851 {
2852 TCGv_ptr pd, zn, pg;
2853 unsigned vsz;
2854 TCGv_i32 t;
2855
2856 if (gen_fn == NULL) {
2857 return false;
2858 }
2859 if (!sve_access_check(s)) {
2860 return true;
2861 }
2862
2863 vsz = vec_full_reg_size(s);
2864 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2865 pd = tcg_temp_new_ptr();
2866 zn = tcg_temp_new_ptr();
2867 pg = tcg_temp_new_ptr();
2868
2869 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2870 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2871 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2872
2873 gen_fn(t, pd, zn, pg, t);
2874
2875 tcg_temp_free_ptr(pd);
2876 tcg_temp_free_ptr(zn);
2877 tcg_temp_free_ptr(pg);
2878
2879 do_pred_flags(t);
2880
2881 tcg_temp_free_i32(t);
2882 return true;
2883 }
2884
2885 #define DO_PPZI(NAME, name) \
2886 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2887 uint32_t insn) \
2888 { \
2889 static gen_helper_gvec_flags_3 * const fns[4] = { \
2890 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2891 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2892 }; \
2893 return do_ppzi_flags(s, a, fns[a->esz]); \
2894 }
2895
2896 DO_PPZI(CMPEQ, cmpeq)
2897 DO_PPZI(CMPNE, cmpne)
2898 DO_PPZI(CMPGT, cmpgt)
2899 DO_PPZI(CMPGE, cmpge)
2900 DO_PPZI(CMPHI, cmphi)
2901 DO_PPZI(CMPHS, cmphs)
2902 DO_PPZI(CMPLT, cmplt)
2903 DO_PPZI(CMPLE, cmple)
2904 DO_PPZI(CMPLO, cmplo)
2905 DO_PPZI(CMPLS, cmpls)
2906
2907 #undef DO_PPZI
2908
2909 /*
2910 *** SVE Partition Break Group
2911 */
2912
2913 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2914 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2915 {
2916 if (!sve_access_check(s)) {
2917 return true;
2918 }
2919
2920 unsigned vsz = pred_full_reg_size(s);
2921
2922 /* Predicate sizes may be smaller and cannot use simd_desc. */
2923 TCGv_ptr d = tcg_temp_new_ptr();
2924 TCGv_ptr n = tcg_temp_new_ptr();
2925 TCGv_ptr m = tcg_temp_new_ptr();
2926 TCGv_ptr g = tcg_temp_new_ptr();
2927 TCGv_i32 t = tcg_const_i32(vsz - 2);
2928
2929 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2930 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2931 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2932 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2933
2934 if (a->s) {
2935 fn_s(t, d, n, m, g, t);
2936 do_pred_flags(t);
2937 } else {
2938 fn(d, n, m, g, t);
2939 }
2940 tcg_temp_free_ptr(d);
2941 tcg_temp_free_ptr(n);
2942 tcg_temp_free_ptr(m);
2943 tcg_temp_free_ptr(g);
2944 tcg_temp_free_i32(t);
2945 return true;
2946 }
2947
2948 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2949 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2950 {
2951 if (!sve_access_check(s)) {
2952 return true;
2953 }
2954
2955 unsigned vsz = pred_full_reg_size(s);
2956
2957 /* Predicate sizes may be smaller and cannot use simd_desc. */
2958 TCGv_ptr d = tcg_temp_new_ptr();
2959 TCGv_ptr n = tcg_temp_new_ptr();
2960 TCGv_ptr g = tcg_temp_new_ptr();
2961 TCGv_i32 t = tcg_const_i32(vsz - 2);
2962
2963 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2964 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2965 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2966
2967 if (a->s) {
2968 fn_s(t, d, n, g, t);
2969 do_pred_flags(t);
2970 } else {
2971 fn(d, n, g, t);
2972 }
2973 tcg_temp_free_ptr(d);
2974 tcg_temp_free_ptr(n);
2975 tcg_temp_free_ptr(g);
2976 tcg_temp_free_i32(t);
2977 return true;
2978 }
2979
2980 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2981 {
2982 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2983 }
2984
2985 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2986 {
2987 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2988 }
2989
2990 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2991 {
2992 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2993 }
2994
2995 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2996 {
2997 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2998 }
2999
3000 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3001 {
3002 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3003 }
3004
3005 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3006 {
3007 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3008 }
3009
3010 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3011 {
3012 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3013 }
3014
3015 /*
3016 *** SVE Predicate Count Group
3017 */
3018
3019 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3020 {
3021 unsigned psz = pred_full_reg_size(s);
3022
3023 if (psz <= 8) {
3024 uint64_t psz_mask;
3025
3026 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3027 if (pn != pg) {
3028 TCGv_i64 g = tcg_temp_new_i64();
3029 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3030 tcg_gen_and_i64(val, val, g);
3031 tcg_temp_free_i64(g);
3032 }
3033
3034 /* Reduce the pred_esz_masks value simply to reduce the
3035 * size of the code generated here.
3036 */
3037 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3038 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3039
3040 tcg_gen_ctpop_i64(val, val);
3041 } else {
3042 TCGv_ptr t_pn = tcg_temp_new_ptr();
3043 TCGv_ptr t_pg = tcg_temp_new_ptr();
3044 unsigned desc;
3045 TCGv_i32 t_desc;
3046
3047 desc = psz - 2;
3048 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3049
3050 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3051 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3052 t_desc = tcg_const_i32(desc);
3053
3054 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3055 tcg_temp_free_ptr(t_pn);
3056 tcg_temp_free_ptr(t_pg);
3057 tcg_temp_free_i32(t_desc);
3058 }
3059 }
3060
3061 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3062 {
3063 if (sve_access_check(s)) {
3064 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3065 }
3066 return true;
3067 }
3068
3069 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3070 uint32_t insn)
3071 {
3072 if (sve_access_check(s)) {
3073 TCGv_i64 reg = cpu_reg(s, a->rd);
3074 TCGv_i64 val = tcg_temp_new_i64();
3075
3076 do_cntp(s, val, a->esz, a->pg, a->pg);
3077 if (a->d) {
3078 tcg_gen_sub_i64(reg, reg, val);
3079 } else {
3080 tcg_gen_add_i64(reg, reg, val);
3081 }
3082 tcg_temp_free_i64(val);
3083 }
3084 return true;
3085 }
3086
3087 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3088 uint32_t insn)
3089 {
3090 if (a->esz == 0) {
3091 return false;
3092 }
3093 if (sve_access_check(s)) {
3094 unsigned vsz = vec_full_reg_size(s);
3095 TCGv_i64 val = tcg_temp_new_i64();
3096 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3097
3098 do_cntp(s, val, a->esz, a->pg, a->pg);
3099 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3100 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3101 }
3102 return true;
3103 }
3104
3105 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3106 uint32_t insn)
3107 {
3108 if (sve_access_check(s)) {
3109 TCGv_i64 reg = cpu_reg(s, a->rd);
3110 TCGv_i64 val = tcg_temp_new_i64();
3111
3112 do_cntp(s, val, a->esz, a->pg, a->pg);
3113 do_sat_addsub_32(reg, val, a->u, a->d);
3114 }
3115 return true;
3116 }
3117
3118 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3119 uint32_t insn)
3120 {
3121 if (sve_access_check(s)) {
3122 TCGv_i64 reg = cpu_reg(s, a->rd);
3123 TCGv_i64 val = tcg_temp_new_i64();
3124
3125 do_cntp(s, val, a->esz, a->pg, a->pg);
3126 do_sat_addsub_64(reg, val, a->u, a->d);
3127 }
3128 return true;
3129 }
3130
3131 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3132 uint32_t insn)
3133 {
3134 if (a->esz == 0) {
3135 return false;
3136 }
3137 if (sve_access_check(s)) {
3138 TCGv_i64 val = tcg_temp_new_i64();
3139 do_cntp(s, val, a->esz, a->pg, a->pg);
3140 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3141 }
3142 return true;
3143 }
3144
3145 /*
3146 *** SVE Integer Compare Scalars Group
3147 */
3148
3149 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3150 {
3151 if (!sve_access_check(s)) {
3152 return true;
3153 }
3154
3155 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3156 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3157 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3158 TCGv_i64 cmp = tcg_temp_new_i64();
3159
3160 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3161 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3162 tcg_temp_free_i64(cmp);
3163
3164 /* VF = !NF & !CF. */
3165 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3166 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3167
3168 /* Both NF and VF actually look at bit 31. */
3169 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3170 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3171 return true;
3172 }
3173
3174 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3175 {
3176 TCGv_i64 op0, op1, t0, t1, tmax;
3177 TCGv_i32 t2, t3;
3178 TCGv_ptr ptr;
3179 unsigned desc, vsz = vec_full_reg_size(s);
3180 TCGCond cond;
3181
3182 if (!sve_access_check(s)) {
3183 return true;
3184 }
3185
3186 op0 = read_cpu_reg(s, a->rn, 1);
3187 op1 = read_cpu_reg(s, a->rm, 1);
3188
3189 if (!a->sf) {
3190 if (a->u) {
3191 tcg_gen_ext32u_i64(op0, op0);
3192 tcg_gen_ext32u_i64(op1, op1);
3193 } else {
3194 tcg_gen_ext32s_i64(op0, op0);
3195 tcg_gen_ext32s_i64(op1, op1);
3196 }
3197 }
3198
3199 /* For the helper, compress the different conditions into a computation
3200 * of how many iterations for which the condition is true.
3201 */
3202 t0 = tcg_temp_new_i64();
3203 t1 = tcg_temp_new_i64();
3204 tcg_gen_sub_i64(t0, op1, op0);
3205
3206 tmax = tcg_const_i64(vsz >> a->esz);
3207 if (a->eq) {
3208 /* Equality means one more iteration. */
3209 tcg_gen_addi_i64(t0, t0, 1);
3210
3211 /* If op1 is max (un)signed integer (and the only time the addition
3212 * above could overflow), then we produce an all-true predicate by
3213 * setting the count to the vector length. This is because the
3214 * pseudocode is described as an increment + compare loop, and the
3215 * max integer would always compare true.
3216 */
3217 tcg_gen_movi_i64(t1, (a->sf
3218 ? (a->u ? UINT64_MAX : INT64_MAX)
3219 : (a->u ? UINT32_MAX : INT32_MAX)));
3220 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3221 }
3222
3223 /* Bound to the maximum. */
3224 tcg_gen_umin_i64(t0, t0, tmax);
3225 tcg_temp_free_i64(tmax);
3226
3227 /* Set the count to zero if the condition is false. */
3228 cond = (a->u
3229 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3230 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3231 tcg_gen_movi_i64(t1, 0);
3232 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3233 tcg_temp_free_i64(t1);
3234
3235 /* Since we're bounded, pass as a 32-bit type. */
3236 t2 = tcg_temp_new_i32();
3237 tcg_gen_extrl_i64_i32(t2, t0);
3238 tcg_temp_free_i64(t0);
3239
3240 /* Scale elements to bits. */
3241 tcg_gen_shli_i32(t2, t2, a->esz);
3242
3243 desc = (vsz / 8) - 2;
3244 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3245 t3 = tcg_const_i32(desc);
3246
3247 ptr = tcg_temp_new_ptr();
3248 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3249
3250 gen_helper_sve_while(t2, ptr, t2, t3);
3251 do_pred_flags(t2);
3252
3253 tcg_temp_free_ptr(ptr);
3254 tcg_temp_free_i32(t2);
3255 tcg_temp_free_i32(t3);
3256 return true;
3257 }
3258
3259 /*
3260 *** SVE Integer Wide Immediate - Unpredicated Group
3261 */
3262
3263 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3264 {
3265 if (a->esz == 0) {
3266 return false;
3267 }
3268 if (sve_access_check(s)) {
3269 unsigned vsz = vec_full_reg_size(s);
3270 int dofs = vec_full_reg_offset(s, a->rd);
3271 uint64_t imm;
3272
3273 /* Decode the VFP immediate. */
3274 imm = vfp_expand_imm(a->esz, a->imm);
3275 imm = dup_const(a->esz, imm);
3276
3277 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3278 }
3279 return true;
3280 }
3281
3282 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3283 {
3284 if (a->esz == 0 && extract32(insn, 13, 1)) {
3285 return false;
3286 }
3287 if (sve_access_check(s)) {
3288 unsigned vsz = vec_full_reg_size(s);
3289 int dofs = vec_full_reg_offset(s, a->rd);
3290
3291 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3292 }
3293 return true;
3294 }
3295
3296 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3297 {
3298 if (a->esz == 0 && extract32(insn, 13, 1)) {
3299 return false;
3300 }
3301 if (sve_access_check(s)) {
3302 unsigned vsz = vec_full_reg_size(s);
3303 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3304 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3305 }
3306 return true;
3307 }
3308
3309 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3310 {
3311 a->imm = -a->imm;
3312 return trans_ADD_zzi(s, a, insn);
3313 }
3314
3315 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3316 {
3317 static const GVecGen2s op[4] = {
3318 { .fni8 = tcg_gen_vec_sub8_i64,
3319 .fniv = tcg_gen_sub_vec,
3320 .fno = gen_helper_sve_subri_b,
3321 .opc = INDEX_op_sub_vec,
3322 .vece = MO_8,
3323 .scalar_first = true },
3324 { .fni8 = tcg_gen_vec_sub16_i64,
3325 .fniv = tcg_gen_sub_vec,
3326 .fno = gen_helper_sve_subri_h,
3327 .opc = INDEX_op_sub_vec,
3328 .vece = MO_16,
3329 .scalar_first = true },
3330 { .fni4 = tcg_gen_sub_i32,
3331 .fniv = tcg_gen_sub_vec,
3332 .fno = gen_helper_sve_subri_s,
3333 .opc = INDEX_op_sub_vec,
3334 .vece = MO_32,
3335 .scalar_first = true },
3336 { .fni8 = tcg_gen_sub_i64,
3337 .fniv = tcg_gen_sub_vec,
3338 .fno = gen_helper_sve_subri_d,
3339 .opc = INDEX_op_sub_vec,
3340 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3341 .vece = MO_64,
3342 .scalar_first = true }
3343 };
3344
3345 if (a->esz == 0 && extract32(insn, 13, 1)) {
3346 return false;
3347 }
3348 if (sve_access_check(s)) {
3349 unsigned vsz = vec_full_reg_size(s);
3350 TCGv_i64 c = tcg_const_i64(a->imm);
3351 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3352 vec_full_reg_offset(s, a->rn),
3353 vsz, vsz, c, &op[a->esz]);
3354 tcg_temp_free_i64(c);
3355 }
3356 return true;
3357 }
3358
3359 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3360 {
3361 if (sve_access_check(s)) {
3362 unsigned vsz = vec_full_reg_size(s);
3363 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3364 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3365 }
3366 return true;
3367 }
3368
3369 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3370 bool u, bool d)
3371 {
3372 if (a->esz == 0 && extract32(insn, 13, 1)) {
3373 return false;
3374 }
3375 if (sve_access_check(s)) {
3376 TCGv_i64 val = tcg_const_i64(a->imm);
3377 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3378 tcg_temp_free_i64(val);
3379 }
3380 return true;
3381 }
3382
3383 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3384 {
3385 return do_zzi_sat(s, a, insn, false, false);
3386 }
3387
3388 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3389 {
3390 return do_zzi_sat(s, a, insn, true, false);
3391 }
3392
3393 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3394 {
3395 return do_zzi_sat(s, a, insn, false, true);
3396 }
3397
3398 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3399 {
3400 return do_zzi_sat(s, a, insn, true, true);
3401 }
3402
3403 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3404 {
3405 if (sve_access_check(s)) {
3406 unsigned vsz = vec_full_reg_size(s);
3407 TCGv_i64 c = tcg_const_i64(a->imm);
3408
3409 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3410 vec_full_reg_offset(s, a->rn),
3411 c, vsz, vsz, 0, fn);
3412 tcg_temp_free_i64(c);
3413 }
3414 return true;
3415 }
3416
3417 #define DO_ZZI(NAME, name) \
3418 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3419 uint32_t insn) \
3420 { \
3421 static gen_helper_gvec_2i * const fns[4] = { \
3422 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3423 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3424 }; \
3425 return do_zzi_ool(s, a, fns[a->esz]); \
3426 }
3427
3428 DO_ZZI(SMAX, smax)
3429 DO_ZZI(UMAX, umax)
3430 DO_ZZI(SMIN, smin)
3431 DO_ZZI(UMIN, umin)
3432
3433 #undef DO_ZZI
3434
3435 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a, uint32_t insn)
3436 {
3437 static gen_helper_gvec_3 * const fns[2][2] = {
3438 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3439 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3440 };
3441
3442 if (sve_access_check(s)) {
3443 unsigned vsz = vec_full_reg_size(s);
3444 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3445 vec_full_reg_offset(s, a->rn),
3446 vec_full_reg_offset(s, a->rm),
3447 vsz, vsz, 0, fns[a->u][a->sz]);
3448 }
3449 return true;
3450 }
3451
3452 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a, uint32_t insn)
3453 {
3454 static gen_helper_gvec_3 * const fns[2][2] = {
3455 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3456 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3457 };
3458
3459 if (sve_access_check(s)) {
3460 unsigned vsz = vec_full_reg_size(s);
3461 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3462 vec_full_reg_offset(s, a->rn),
3463 vec_full_reg_offset(s, a->rm),
3464 vsz, vsz, a->index, fns[a->u][a->sz]);
3465 }
3466 return true;
3467 }
3468
3469
3470 /*
3471 *** SVE Floating Point Multiply-Add Indexed Group
3472 */
3473
3474 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3475 {
3476 static gen_helper_gvec_4_ptr * const fns[3] = {
3477 gen_helper_gvec_fmla_idx_h,
3478 gen_helper_gvec_fmla_idx_s,
3479 gen_helper_gvec_fmla_idx_d,
3480 };
3481
3482 if (sve_access_check(s)) {
3483 unsigned vsz = vec_full_reg_size(s);
3484 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3485 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3486 vec_full_reg_offset(s, a->rn),
3487 vec_full_reg_offset(s, a->rm),
3488 vec_full_reg_offset(s, a->ra),
3489 status, vsz, vsz, (a->index << 1) | a->sub,
3490 fns[a->esz - 1]);
3491 tcg_temp_free_ptr(status);
3492 }
3493 return true;
3494 }
3495
3496 /*
3497 *** SVE Floating Point Multiply Indexed Group
3498 */
3499
3500 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3501 {
3502 static gen_helper_gvec_3_ptr * const fns[3] = {
3503 gen_helper_gvec_fmul_idx_h,
3504 gen_helper_gvec_fmul_idx_s,
3505 gen_helper_gvec_fmul_idx_d,
3506 };
3507
3508 if (sve_access_check(s)) {
3509 unsigned vsz = vec_full_reg_size(s);
3510 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3511 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3512 vec_full_reg_offset(s, a->rn),
3513 vec_full_reg_offset(s, a->rm),
3514 status, vsz, vsz, a->index, fns[a->esz - 1]);
3515 tcg_temp_free_ptr(status);
3516 }
3517 return true;
3518 }
3519
3520 /*
3521 *** SVE Floating Point Fast Reduction Group
3522 */
3523
3524 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3525 TCGv_ptr, TCGv_i32);
3526
3527 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3528 gen_helper_fp_reduce *fn)
3529 {
3530 unsigned vsz = vec_full_reg_size(s);
3531 unsigned p2vsz = pow2ceil(vsz);
3532 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3533 TCGv_ptr t_zn, t_pg, status;
3534 TCGv_i64 temp;
3535
3536 temp = tcg_temp_new_i64();
3537 t_zn = tcg_temp_new_ptr();
3538 t_pg = tcg_temp_new_ptr();
3539
3540 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3541 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3542 status = get_fpstatus_ptr(a->esz == MO_16);
3543
3544 fn(temp, t_zn, t_pg, status, t_desc);
3545 tcg_temp_free_ptr(t_zn);
3546 tcg_temp_free_ptr(t_pg);
3547 tcg_temp_free_ptr(status);
3548 tcg_temp_free_i32(t_desc);
3549
3550 write_fp_dreg(s, a->rd, temp);
3551 tcg_temp_free_i64(temp);
3552 }
3553
3554 #define DO_VPZ(NAME, name) \
3555 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3556 { \
3557 static gen_helper_fp_reduce * const fns[3] = { \
3558 gen_helper_sve_##name##_h, \
3559 gen_helper_sve_##name##_s, \
3560 gen_helper_sve_##name##_d, \
3561 }; \
3562 if (a->esz == 0) { \
3563 return false; \
3564 } \
3565 if (sve_access_check(s)) { \
3566 do_reduce(s, a, fns[a->esz - 1]); \
3567 } \
3568 return true; \
3569 }
3570
3571 DO_VPZ(FADDV, faddv)
3572 DO_VPZ(FMINNMV, fminnmv)
3573 DO_VPZ(FMAXNMV, fmaxnmv)
3574 DO_VPZ(FMINV, fminv)
3575 DO_VPZ(FMAXV, fmaxv)
3576
3577 /*
3578 *** SVE Floating Point Unary Operations - Unpredicated Group
3579 */
3580
3581 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3582 {
3583 unsigned vsz = vec_full_reg_size(s);
3584 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3585
3586 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3587 vec_full_reg_offset(s, a->rn),
3588 status, vsz, vsz, 0, fn);
3589 tcg_temp_free_ptr(status);
3590 }
3591
3592 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3593 {
3594 static gen_helper_gvec_2_ptr * const fns[3] = {
3595 gen_helper_gvec_frecpe_h,
3596 gen_helper_gvec_frecpe_s,
3597 gen_helper_gvec_frecpe_d,
3598 };
3599 if (a->esz == 0) {
3600 return false;
3601 }
3602 if (sve_access_check(s)) {
3603 do_zz_fp(s, a, fns[a->esz - 1]);
3604 }
3605 return true;
3606 }
3607
3608 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3609 {
3610 static gen_helper_gvec_2_ptr * const fns[3] = {
3611 gen_helper_gvec_frsqrte_h,
3612 gen_helper_gvec_frsqrte_s,
3613 gen_helper_gvec_frsqrte_d,
3614 };
3615 if (a->esz == 0) {
3616 return false;
3617 }
3618 if (sve_access_check(s)) {
3619 do_zz_fp(s, a, fns[a->esz - 1]);
3620 }
3621 return true;
3622 }
3623
3624 /*
3625 *** SVE Floating Point Compare with Zero Group
3626 */
3627
3628 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3629 gen_helper_gvec_3_ptr *fn)
3630 {
3631 unsigned vsz = vec_full_reg_size(s);
3632 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3633
3634 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3635 vec_full_reg_offset(s, a->rn),
3636 pred_full_reg_offset(s, a->pg),
3637 status, vsz, vsz, 0, fn);
3638 tcg_temp_free_ptr(status);
3639 }
3640
3641 #define DO_PPZ(NAME, name) \
3642 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3643 { \
3644 static gen_helper_gvec_3_ptr * const fns[3] = { \
3645 gen_helper_sve_##name##_h, \
3646 gen_helper_sve_##name##_s, \
3647 gen_helper_sve_##name##_d, \
3648 }; \
3649 if (a->esz == 0) { \
3650 return false; \
3651 } \
3652 if (sve_access_check(s)) { \
3653 do_ppz_fp(s, a, fns[a->esz - 1]); \
3654 } \
3655 return true; \
3656 }
3657
3658 DO_PPZ(FCMGE_ppz0, fcmge0)
3659 DO_PPZ(FCMGT_ppz0, fcmgt0)
3660 DO_PPZ(FCMLE_ppz0, fcmle0)
3661 DO_PPZ(FCMLT_ppz0, fcmlt0)
3662 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3663 DO_PPZ(FCMNE_ppz0, fcmne0)
3664
3665 #undef DO_PPZ
3666
3667 /*
3668 *** SVE floating-point trig multiply-add coefficient
3669 */
3670
3671 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a, uint32_t insn)
3672 {
3673 static gen_helper_gvec_3_ptr * const fns[3] = {
3674 gen_helper_sve_ftmad_h,
3675 gen_helper_sve_ftmad_s,
3676 gen_helper_sve_ftmad_d,
3677 };
3678
3679 if (a->esz == 0) {
3680 return false;
3681 }
3682 if (sve_access_check(s)) {
3683 unsigned vsz = vec_full_reg_size(s);
3684 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3685 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3686 vec_full_reg_offset(s, a->rn),
3687 vec_full_reg_offset(s, a->rm),
3688 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3689 tcg_temp_free_ptr(status);
3690 }
3691 return true;
3692 }
3693
3694 /*
3695 *** SVE Floating Point Accumulating Reduction Group
3696 */
3697
3698 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3699 {
3700 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3701 TCGv_ptr, TCGv_ptr, TCGv_i32);
3702 static fadda_fn * const fns[3] = {
3703 gen_helper_sve_fadda_h,
3704 gen_helper_sve_fadda_s,
3705 gen_helper_sve_fadda_d,
3706 };
3707 unsigned vsz = vec_full_reg_size(s);
3708 TCGv_ptr t_rm, t_pg, t_fpst;
3709 TCGv_i64 t_val;
3710 TCGv_i32 t_desc;
3711
3712 if (a->esz == 0) {
3713 return false;
3714 }
3715 if (!sve_access_check(s)) {
3716 return true;
3717 }
3718
3719 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3720 t_rm = tcg_temp_new_ptr();
3721 t_pg = tcg_temp_new_ptr();
3722 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3723 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3724 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3725 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3726
3727 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3728
3729 tcg_temp_free_i32(t_desc);
3730 tcg_temp_free_ptr(t_fpst);
3731 tcg_temp_free_ptr(t_pg);
3732 tcg_temp_free_ptr(t_rm);
3733
3734 write_fp_dreg(s, a->rd, t_val);
3735 tcg_temp_free_i64(t_val);
3736 return true;
3737 }
3738
3739 /*
3740 *** SVE Floating Point Arithmetic - Unpredicated Group
3741 */
3742
3743 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3744 gen_helper_gvec_3_ptr *fn)
3745 {
3746 if (fn == NULL) {
3747 return false;
3748 }
3749 if (sve_access_check(s)) {
3750 unsigned vsz = vec_full_reg_size(s);
3751 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3752 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3753 vec_full_reg_offset(s, a->rn),
3754 vec_full_reg_offset(s, a->rm),
3755 status, vsz, vsz, 0, fn);
3756 tcg_temp_free_ptr(status);
3757 }
3758 return true;
3759 }
3760
3761
3762 #define DO_FP3(NAME, name) \
3763 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3764 { \
3765 static gen_helper_gvec_3_ptr * const fns[4] = { \
3766 NULL, gen_helper_gvec_##name##_h, \
3767 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3768 }; \
3769 return do_zzz_fp(s, a, fns[a->esz]); \
3770 }
3771
3772 DO_FP3(FADD_zzz, fadd)
3773 DO_FP3(FSUB_zzz, fsub)
3774 DO_FP3(FMUL_zzz, fmul)
3775 DO_FP3(FTSMUL, ftsmul)
3776 DO_FP3(FRECPS, recps)
3777 DO_FP3(FRSQRTS, rsqrts)
3778
3779 #undef DO_FP3
3780
3781 /*
3782 *** SVE Floating Point Arithmetic - Predicated Group
3783 */
3784
3785 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3786 gen_helper_gvec_4_ptr *fn)
3787 {
3788 if (fn == NULL) {
3789 return false;
3790 }
3791 if (sve_access_check(s)) {
3792 unsigned vsz = vec_full_reg_size(s);
3793 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3794 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3795 vec_full_reg_offset(s, a->rn),
3796 vec_full_reg_offset(s, a->rm),
3797 pred_full_reg_offset(s, a->pg),
3798 status, vsz, vsz, 0, fn);
3799 tcg_temp_free_ptr(status);
3800 }
3801 return true;
3802 }
3803
3804 #define DO_FP3(NAME, name) \
3805 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3806 { \
3807 static gen_helper_gvec_4_ptr * const fns[4] = { \
3808 NULL, gen_helper_sve_##name##_h, \
3809 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3810 }; \
3811 return do_zpzz_fp(s, a, fns[a->esz]); \
3812 }
3813
3814 DO_FP3(FADD_zpzz, fadd)
3815 DO_FP3(FSUB_zpzz, fsub)
3816 DO_FP3(FMUL_zpzz, fmul)
3817 DO_FP3(FMIN_zpzz, fmin)
3818 DO_FP3(FMAX_zpzz, fmax)
3819 DO_FP3(FMINNM_zpzz, fminnum)
3820 DO_FP3(FMAXNM_zpzz, fmaxnum)
3821 DO_FP3(FABD, fabd)
3822 DO_FP3(FSCALE, fscalbn)
3823 DO_FP3(FDIV, fdiv)
3824 DO_FP3(FMULX, fmulx)
3825
3826 #undef DO_FP3
3827
3828 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3829 TCGv_i64, TCGv_ptr, TCGv_i32);
3830
3831 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3832 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3833 {
3834 unsigned vsz = vec_full_reg_size(s);
3835 TCGv_ptr t_zd, t_zn, t_pg, status;
3836 TCGv_i32 desc;
3837
3838 t_zd = tcg_temp_new_ptr();
3839 t_zn = tcg_temp_new_ptr();
3840 t_pg = tcg_temp_new_ptr();
3841 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3842 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3843 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3844
3845 status = get_fpstatus_ptr(is_fp16);
3846 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3847 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3848
3849 tcg_temp_free_i32(desc);
3850 tcg_temp_free_ptr(status);
3851 tcg_temp_free_ptr(t_pg);
3852 tcg_temp_free_ptr(t_zn);
3853 tcg_temp_free_ptr(t_zd);
3854 }
3855
3856 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3857 gen_helper_sve_fp2scalar *fn)
3858 {
3859 TCGv_i64 temp = tcg_const_i64(imm);
3860 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3861 tcg_temp_free_i64(temp);
3862 }
3863
3864 #define DO_FP_IMM(NAME, name, const0, const1) \
3865 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a, \
3866 uint32_t insn) \
3867 { \
3868 static gen_helper_sve_fp2scalar * const fns[3] = { \
3869 gen_helper_sve_##name##_h, \
3870 gen_helper_sve_##name##_s, \
3871 gen_helper_sve_##name##_d \
3872 }; \
3873 static uint64_t const val[3][2] = { \
3874 { float16_##const0, float16_##const1 }, \
3875 { float32_##const0, float32_##const1 }, \
3876 { float64_##const0, float64_##const1 }, \
3877 }; \
3878 if (a->esz == 0) { \
3879 return false; \
3880 } \
3881 if (sve_access_check(s)) { \
3882 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3883 } \
3884 return true; \
3885 }
3886
3887 #define float16_two make_float16(0x4000)
3888 #define float32_two make_float32(0x40000000)
3889 #define float64_two make_float64(0x4000000000000000ULL)
3890
3891 DO_FP_IMM(FADD, fadds, half, one)
3892 DO_FP_IMM(FSUB, fsubs, half, one)
3893 DO_FP_IMM(FMUL, fmuls, half, two)
3894 DO_FP_IMM(FSUBR, fsubrs, half, one)
3895 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3896 DO_FP_IMM(FMINNM, fminnms, zero, one)
3897 DO_FP_IMM(FMAX, fmaxs, zero, one)
3898 DO_FP_IMM(FMIN, fmins, zero, one)
3899
3900 #undef DO_FP_IMM
3901
3902 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3903 gen_helper_gvec_4_ptr *fn)
3904 {
3905 if (fn == NULL) {
3906 return false;
3907 }
3908 if (sve_access_check(s)) {
3909 unsigned vsz = vec_full_reg_size(s);
3910 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3911 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3912 vec_full_reg_offset(s, a->rn),
3913 vec_full_reg_offset(s, a->rm),
3914 pred_full_reg_offset(s, a->pg),
3915 status, vsz, vsz, 0, fn);
3916 tcg_temp_free_ptr(status);
3917 }
3918 return true;
3919 }
3920
3921 #define DO_FPCMP(NAME, name) \
3922 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
3923 uint32_t insn) \
3924 { \
3925 static gen_helper_gvec_4_ptr * const fns[4] = { \
3926 NULL, gen_helper_sve_##name##_h, \
3927 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3928 }; \
3929 return do_fp_cmp(s, a, fns[a->esz]); \
3930 }
3931
3932 DO_FPCMP(FCMGE, fcmge)
3933 DO_FPCMP(FCMGT, fcmgt)
3934 DO_FPCMP(FCMEQ, fcmeq)
3935 DO_FPCMP(FCMNE, fcmne)
3936 DO_FPCMP(FCMUO, fcmuo)
3937 DO_FPCMP(FACGE, facge)
3938 DO_FPCMP(FACGT, facgt)
3939
3940 #undef DO_FPCMP
3941
3942 static bool trans_FCADD(DisasContext *s, arg_FCADD *a, uint32_t insn)
3943 {
3944 static gen_helper_gvec_4_ptr * const fns[3] = {
3945 gen_helper_sve_fcadd_h,
3946 gen_helper_sve_fcadd_s,
3947 gen_helper_sve_fcadd_d
3948 };
3949
3950 if (a->esz == 0) {
3951 return false;
3952 }
3953 if (sve_access_check(s)) {
3954 unsigned vsz = vec_full_reg_size(s);
3955 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3956 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3957 vec_full_reg_offset(s, a->rn),
3958 vec_full_reg_offset(s, a->rm),
3959 pred_full_reg_offset(s, a->pg),
3960 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3961 tcg_temp_free_ptr(status);
3962 }
3963 return true;
3964 }
3965
3966 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3967
3968 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3969 {
3970 if (fn == NULL) {
3971 return false;
3972 }
3973 if (!sve_access_check(s)) {
3974 return true;
3975 }
3976
3977 unsigned vsz = vec_full_reg_size(s);
3978 unsigned desc;
3979 TCGv_i32 t_desc;
3980 TCGv_ptr pg = tcg_temp_new_ptr();
3981
3982 /* We would need 7 operands to pass these arguments "properly".
3983 * So we encode all the register numbers into the descriptor.
3984 */
3985 desc = deposit32(a->rd, 5, 5, a->rn);
3986 desc = deposit32(desc, 10, 5, a->rm);
3987 desc = deposit32(desc, 15, 5, a->ra);
3988 desc = simd_desc(vsz, vsz, desc);
3989
3990 t_desc = tcg_const_i32(desc);
3991 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3992 fn(cpu_env, pg, t_desc);
3993 tcg_temp_free_i32(t_desc);
3994 tcg_temp_free_ptr(pg);
3995 return true;
3996 }
3997
3998 #define DO_FMLA(NAME, name) \
3999 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
4000 { \
4001 static gen_helper_sve_fmla * const fns[4] = { \
4002 NULL, gen_helper_sve_##name##_h, \
4003 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4004 }; \
4005 return do_fmla(s, a, fns[a->esz]); \
4006 }
4007
4008 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4009 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4010 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4011 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4012
4013 #undef DO_FMLA
4014
4015 static bool trans_FCMLA_zpzzz(DisasContext *s,
4016 arg_FCMLA_zpzzz *a, uint32_t insn)
4017 {
4018 static gen_helper_sve_fmla * const fns[3] = {
4019 gen_helper_sve_fcmla_zpzzz_h,
4020 gen_helper_sve_fcmla_zpzzz_s,
4021 gen_helper_sve_fcmla_zpzzz_d,
4022 };
4023
4024 if (a->esz == 0) {
4025 return false;
4026 }
4027 if (sve_access_check(s)) {
4028 unsigned vsz = vec_full_reg_size(s);
4029 unsigned desc;
4030 TCGv_i32 t_desc;
4031 TCGv_ptr pg = tcg_temp_new_ptr();
4032
4033 /* We would need 7 operands to pass these arguments "properly".
4034 * So we encode all the register numbers into the descriptor.
4035 */
4036 desc = deposit32(a->rd, 5, 5, a->rn);
4037 desc = deposit32(desc, 10, 5, a->rm);
4038 desc = deposit32(desc, 15, 5, a->ra);
4039 desc = deposit32(desc, 20, 2, a->rot);
4040 desc = sextract32(desc, 0, 22);
4041 desc = simd_desc(vsz, vsz, desc);
4042
4043 t_desc = tcg_const_i32(desc);
4044 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4045 fns[a->esz - 1](cpu_env, pg, t_desc);
4046 tcg_temp_free_i32(t_desc);
4047 tcg_temp_free_ptr(pg);
4048 }
4049 return true;
4050 }
4051
4052 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a, uint32_t insn)
4053 {
4054 static gen_helper_gvec_3_ptr * const fns[2] = {
4055 gen_helper_gvec_fcmlah_idx,
4056 gen_helper_gvec_fcmlas_idx,
4057 };
4058
4059 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4060 tcg_debug_assert(a->rd == a->ra);
4061 if (sve_access_check(s)) {
4062 unsigned vsz = vec_full_reg_size(s);
4063 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4064 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4065 vec_full_reg_offset(s, a->rn),
4066 vec_full_reg_offset(s, a->rm),
4067 status, vsz, vsz,
4068 a->index * 4 + a->rot,
4069 fns[a->esz - 1]);
4070 tcg_temp_free_ptr(status);
4071 }
4072 return true;
4073 }
4074
4075 /*
4076 *** SVE Floating Point Unary Operations Predicated Group
4077 */
4078
4079 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4080 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4081 {
4082 if (sve_access_check(s)) {
4083 unsigned vsz = vec_full_reg_size(s);
4084 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4085 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4086 vec_full_reg_offset(s, rn),
4087 pred_full_reg_offset(s, pg),
4088 status, vsz, vsz, 0, fn);
4089 tcg_temp_free_ptr(status);
4090 }
4091 return true;
4092 }
4093
4094 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4095 {
4096 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_sh);
4097 }
4098
4099 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4100 {
4101 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4102 }
4103
4104 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4105 {
4106 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_dh);
4107 }
4108
4109 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4110 {
4111 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4112 }
4113
4114 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4115 {
4116 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4117 }
4118
4119 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4120 {
4121 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4122 }
4123
4124 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4125 {
4126 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4127 }
4128
4129 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4130 {
4131 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4132 }
4133
4134 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4135 {
4136 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4137 }
4138
4139 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4140 {
4141 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4142 }
4143
4144 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4145 {
4146 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4147 }
4148
4149 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4150 {
4151 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4152 }
4153
4154 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4155 {
4156 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4157 }
4158
4159 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4160 {
4161 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4162 }
4163
4164 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4165 {
4166 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4167 }
4168
4169 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4170 {
4171 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4172 }
4173
4174 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4175 {
4176 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4177 }
4178
4179 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4180 {
4181 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4182 }
4183
4184 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4185 {
4186 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4187 }
4188
4189 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4190 {
4191 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4192 }
4193
4194 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4195 gen_helper_sve_frint_h,
4196 gen_helper_sve_frint_s,
4197 gen_helper_sve_frint_d
4198 };
4199
4200 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4201 {
4202 if (a->esz == 0) {
4203 return false;
4204 }
4205 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4206 frint_fns[a->esz - 1]);
4207 }
4208
4209 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4210 {
4211 static gen_helper_gvec_3_ptr * const fns[3] = {
4212 gen_helper_sve_frintx_h,
4213 gen_helper_sve_frintx_s,
4214 gen_helper_sve_frintx_d
4215 };
4216 if (a->esz == 0) {
4217 return false;
4218 }
4219 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4220 }
4221
4222 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4223 {
4224 if (a->esz == 0) {
4225 return false;
4226 }
4227 if (sve_access_check(s)) {
4228 unsigned vsz = vec_full_reg_size(s);
4229 TCGv_i32 tmode = tcg_const_i32(mode);
4230 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4231
4232 gen_helper_set_rmode(tmode, tmode, status);
4233
4234 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4235 vec_full_reg_offset(s, a->rn),
4236 pred_full_reg_offset(s, a->pg),
4237 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4238
4239 gen_helper_set_rmode(tmode, tmode, status);
4240 tcg_temp_free_i32(tmode);
4241 tcg_temp_free_ptr(status);
4242 }
4243 return true;
4244 }
4245
4246 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4247 {
4248 return do_frint_mode(s, a, float_round_nearest_even);
4249 }
4250
4251 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4252 {
4253 return do_frint_mode(s, a, float_round_up);
4254 }
4255
4256 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4257 {
4258 return do_frint_mode(s, a, float_round_down);
4259 }
4260
4261 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4262 {
4263 return do_frint_mode(s, a, float_round_to_zero);
4264 }
4265
4266 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4267 {
4268 return do_frint_mode(s, a, float_round_ties_away);
4269 }
4270
4271 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4272 {
4273 static gen_helper_gvec_3_ptr * const fns[3] = {
4274 gen_helper_sve_frecpx_h,
4275 gen_helper_sve_frecpx_s,
4276 gen_helper_sve_frecpx_d
4277 };
4278 if (a->esz == 0) {
4279 return false;
4280 }
4281 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4282 }
4283
4284 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4285 {
4286 static gen_helper_gvec_3_ptr * const fns[3] = {
4287 gen_helper_sve_fsqrt_h,
4288 gen_helper_sve_fsqrt_s,
4289 gen_helper_sve_fsqrt_d
4290 };
4291 if (a->esz == 0) {
4292 return false;
4293 }
4294 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4295 }
4296
4297 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4298 {
4299 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4300 }
4301
4302 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4303 {
4304 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4305 }
4306
4307 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4308 {
4309 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4310 }
4311
4312 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4313 {
4314 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4315 }
4316
4317 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4318 {
4319 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4320 }
4321
4322 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4323 {
4324 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4325 }
4326
4327 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4328 {
4329 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4330 }
4331
4332 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4333 {
4334 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4335 }
4336
4337 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4338 {
4339 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4340 }
4341
4342 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4343 {
4344 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4345 }
4346
4347 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4348 {
4349 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4350 }
4351
4352 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4353 {
4354 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4355 }
4356
4357 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4358 {
4359 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4360 }
4361
4362 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4363 {
4364 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4365 }
4366
4367 /*
4368 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4369 */
4370
4371 /* Subroutine loading a vector register at VOFS of LEN bytes.
4372 * The load should begin at the address Rn + IMM.
4373 */
4374
4375 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4376 {
4377 int len_align = QEMU_ALIGN_DOWN(len, 8);
4378 int len_remain = len % 8;
4379 int nparts = len / 8 + ctpop8(len_remain);
4380 int midx = get_mem_index(s);
4381 TCGv_i64 addr, t0, t1;
4382
4383 addr = tcg_temp_new_i64();
4384 t0 = tcg_temp_new_i64();
4385
4386 /* Note that unpredicated load/store of vector/predicate registers
4387 * are defined as a stream of bytes, which equates to little-endian
4388 * operations on larger quantities. There is no nice way to force
4389 * a little-endian load for aarch64_be-linux-user out of line.
4390 *
4391 * Attempt to keep code expansion to a minimum by limiting the
4392 * amount of unrolling done.
4393 */
4394 if (nparts <= 4) {
4395 int i;
4396
4397 for (i = 0; i < len_align; i += 8) {
4398 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4399 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4400 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4401 }
4402 } else {
4403 TCGLabel *loop = gen_new_label();
4404 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4405
4406 gen_set_label(loop);
4407
4408 /* Minimize the number of local temps that must be re-read from
4409 * the stack each iteration. Instead, re-compute values other
4410 * than the loop counter.
4411 */
4412 tp = tcg_temp_new_ptr();
4413 tcg_gen_addi_ptr(tp, i, imm);
4414 tcg_gen_extu_ptr_i64(addr, tp);
4415 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4416
4417 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4418
4419 tcg_gen_add_ptr(tp, cpu_env, i);
4420 tcg_gen_addi_ptr(i, i, 8);
4421 tcg_gen_st_i64(t0, tp, vofs);
4422 tcg_temp_free_ptr(tp);
4423
4424 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4425 tcg_temp_free_ptr(i);
4426 }
4427
4428 /* Predicate register loads can be any multiple of 2.
4429 * Note that we still store the entire 64-bit unit into cpu_env.
4430 */
4431 if (len_remain) {
4432 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4433
4434 switch (len_remain) {
4435 case 2:
4436 case 4:
4437 case 8:
4438 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4439 break;
4440
4441 case 6:
4442 t1 = tcg_temp_new_i64();
4443 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4444 tcg_gen_addi_i64(addr, addr, 4);
4445 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4446 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4447 tcg_temp_free_i64(t1);
4448 break;
4449
4450 default:
4451 g_assert_not_reached();
4452 }
4453 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4454 }
4455 tcg_temp_free_i64(addr);
4456 tcg_temp_free_i64(t0);
4457 }
4458
4459 /* Similarly for stores. */
4460 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4461 {
4462 int len_align = QEMU_ALIGN_DOWN(len, 8);
4463 int len_remain = len % 8;
4464 int nparts = len / 8 + ctpop8(len_remain);
4465 int midx = get_mem_index(s);
4466 TCGv_i64 addr, t0;
4467
4468 addr = tcg_temp_new_i64();
4469 t0 = tcg_temp_new_i64();
4470
4471 /* Note that unpredicated load/store of vector/predicate registers
4472 * are defined as a stream of bytes, which equates to little-endian
4473 * operations on larger quantities. There is no nice way to force
4474 * a little-endian store for aarch64_be-linux-user out of line.
4475 *
4476 * Attempt to keep code expansion to a minimum by limiting the
4477 * amount of unrolling done.
4478 */
4479 if (nparts <= 4) {
4480 int i;
4481
4482 for (i = 0; i < len_align; i += 8) {
4483 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4484 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4485 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4486 }
4487 } else {
4488 TCGLabel *loop = gen_new_label();
4489 TCGv_ptr t2, i = tcg_const_local_ptr(0);
4490
4491 gen_set_label(loop);
4492
4493 t2 = tcg_temp_new_ptr();
4494 tcg_gen_add_ptr(t2, cpu_env, i);
4495 tcg_gen_ld_i64(t0, t2, vofs);
4496
4497 /* Minimize the number of local temps that must be re-read from
4498 * the stack each iteration. Instead, re-compute values other
4499 * than the loop counter.
4500 */
4501 tcg_gen_addi_ptr(t2, i, imm);
4502 tcg_gen_extu_ptr_i64(addr, t2);
4503 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4504 tcg_temp_free_ptr(t2);
4505
4506 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4507
4508 tcg_gen_addi_ptr(i, i, 8);
4509
4510 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4511 tcg_temp_free_ptr(i);
4512 }
4513
4514 /* Predicate register stores can be any multiple of 2. */
4515 if (len_remain) {
4516 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4517 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4518
4519 switch (len_remain) {
4520 case 2:
4521 case 4:
4522 case 8:
4523 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4524 break;
4525
4526 case 6:
4527 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4528 tcg_gen_addi_i64(addr, addr, 4);
4529 tcg_gen_shri_i64(t0, t0, 32);
4530 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4531 break;
4532
4533 default:
4534 g_assert_not_reached();
4535 }
4536 }
4537 tcg_temp_free_i64(addr);
4538 tcg_temp_free_i64(t0);
4539 }
4540
4541 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4542 {
4543 if (sve_access_check(s)) {
4544 int size = vec_full_reg_size(s);
4545 int off = vec_full_reg_offset(s, a->rd);
4546 do_ldr(s, off, size, a->rn, a->imm * size);
4547 }
4548 return true;
4549 }
4550
4551 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4552 {
4553 if (sve_access_check(s)) {
4554 int size = pred_full_reg_size(s);
4555 int off = pred_full_reg_offset(s, a->rd);
4556 do_ldr(s, off, size, a->rn, a->imm * size);
4557 }
4558 return true;
4559 }
4560
4561 static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4562 {
4563 if (sve_access_check(s)) {
4564 int size = vec_full_reg_size(s);
4565 int off = vec_full_reg_offset(s, a->rd);
4566 do_str(s, off, size, a->rn, a->imm * size);
4567 }
4568 return true;
4569 }
4570
4571 static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4572 {
4573 if (sve_access_check(s)) {
4574 int size = pred_full_reg_size(s);
4575 int off = pred_full_reg_offset(s, a->rd);
4576 do_str(s, off, size, a->rn, a->imm * size);
4577 }
4578 return true;
4579 }
4580
4581 /*
4582 *** SVE Memory - Contiguous Load Group
4583 */
4584
4585 /* The memory mode of the dtype. */
4586 static const TCGMemOp dtype_mop[16] = {
4587 MO_UB, MO_UB, MO_UB, MO_UB,
4588 MO_SL, MO_UW, MO_UW, MO_UW,
4589 MO_SW, MO_SW, MO_UL, MO_UL,
4590 MO_SB, MO_SB, MO_SB, MO_Q
4591 };
4592
4593 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4594
4595 /* The vector element size of dtype. */
4596 static const uint8_t dtype_esz[16] = {
4597 0, 1, 2, 3,
4598 3, 1, 2, 3,
4599 3, 2, 2, 3,
4600 3, 2, 1, 3
4601 };
4602
4603 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4604 gen_helper_gvec_mem *fn)
4605 {
4606 unsigned vsz = vec_full_reg_size(s);
4607 TCGv_ptr t_pg;
4608 TCGv_i32 desc;
4609
4610 /* For e.g. LD4, there are not enough arguments to pass all 4
4611 * registers as pointers, so encode the regno into the data field.
4612 * For consistency, do this even for LD1.
4613 */
4614 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4615 t_pg = tcg_temp_new_ptr();
4616
4617 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4618 fn(cpu_env, t_pg, addr, desc);
4619
4620 tcg_temp_free_ptr(t_pg);
4621 tcg_temp_free_i32(desc);
4622 }
4623
4624 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4625 TCGv_i64 addr, int dtype, int nreg)
4626 {
4627 static gen_helper_gvec_mem * const fns[16][4] = {
4628 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4629 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4630 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4631 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4632 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4633
4634 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4635 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4636 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4637 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4638 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4639
4640 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4641 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4642 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4643 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4644 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4645
4646 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4647 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4648 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4649 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4650 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4651 };
4652 gen_helper_gvec_mem *fn = fns[dtype][nreg];
4653
4654 /* While there are holes in the table, they are not
4655 * accessible via the instruction encoding.
4656 */
4657 assert(fn != NULL);
4658 do_mem_zpa(s, zt, pg, addr, fn);
4659 }
4660
4661 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4662 {
4663 if (a->rm == 31) {
4664 return false;
4665 }
4666 if (sve_access_check(s)) {
4667 TCGv_i64 addr = new_tmp_a64(s);
4668 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4669 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4670 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4671 }
4672 return true;
4673 }
4674
4675 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4676 {
4677 if (sve_access_check(s)) {
4678 int vsz = vec_full_reg_size(s);
4679 int elements = vsz >> dtype_esz[a->dtype];
4680 TCGv_i64 addr = new_tmp_a64(s);
4681
4682 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4683 (a->imm * elements * (a->nreg + 1))
4684 << dtype_msz(a->dtype));
4685 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4686 }
4687 return true;
4688 }
4689
4690 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4691 {
4692 static gen_helper_gvec_mem * const fns[16] = {
4693 gen_helper_sve_ldff1bb_r,
4694 gen_helper_sve_ldff1bhu_r,
4695 gen_helper_sve_ldff1bsu_r,
4696 gen_helper_sve_ldff1bdu_r,
4697
4698 gen_helper_sve_ldff1sds_r,
4699 gen_helper_sve_ldff1hh_r,
4700 gen_helper_sve_ldff1hsu_r,
4701 gen_helper_sve_ldff1hdu_r,
4702
4703 gen_helper_sve_ldff1hds_r,
4704 gen_helper_sve_ldff1hss_r,
4705 gen_helper_sve_ldff1ss_r,
4706 gen_helper_sve_ldff1sdu_r,
4707
4708 gen_helper_sve_ldff1bds_r,
4709 gen_helper_sve_ldff1bss_r,
4710 gen_helper_sve_ldff1bhs_r,
4711 gen_helper_sve_ldff1dd_r,
4712 };
4713
4714 if (sve_access_check(s)) {
4715 TCGv_i64 addr = new_tmp_a64(s);
4716 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4717 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4718 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4719 }
4720 return true;
4721 }
4722
4723 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4724 {
4725 static gen_helper_gvec_mem * const fns[16] = {
4726 gen_helper_sve_ldnf1bb_r,
4727 gen_helper_sve_ldnf1bhu_r,
4728 gen_helper_sve_ldnf1bsu_r,
4729 gen_helper_sve_ldnf1bdu_r,
4730
4731 gen_helper_sve_ldnf1sds_r,
4732 gen_helper_sve_ldnf1hh_r,
4733 gen_helper_sve_ldnf1hsu_r,
4734 gen_helper_sve_ldnf1hdu_r,
4735
4736 gen_helper_sve_ldnf1hds_r,
4737 gen_helper_sve_ldnf1hss_r,
4738 gen_helper_sve_ldnf1ss_r,
4739 gen_helper_sve_ldnf1sdu_r,
4740
4741 gen_helper_sve_ldnf1bds_r,
4742 gen_helper_sve_ldnf1bss_r,
4743 gen_helper_sve_ldnf1bhs_r,
4744 gen_helper_sve_ldnf1dd_r,
4745 };
4746
4747 if (sve_access_check(s)) {
4748 int vsz = vec_full_reg_size(s);
4749 int elements = vsz >> dtype_esz[a->dtype];
4750 int off = (a->imm * elements) << dtype_msz(a->dtype);
4751 TCGv_i64 addr = new_tmp_a64(s);
4752
4753 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4754 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4755 }
4756 return true;
4757 }
4758
4759 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4760 {
4761 static gen_helper_gvec_mem * const fns[4] = {
4762 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4763 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4764 };
4765 unsigned vsz = vec_full_reg_size(s);
4766 TCGv_ptr t_pg;
4767 TCGv_i32 desc;
4768
4769 /* Load the first quadword using the normal predicated load helpers. */
4770 desc = tcg_const_i32(simd_desc(16, 16, zt));
4771 t_pg = tcg_temp_new_ptr();
4772
4773 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4774 fns[msz](cpu_env, t_pg, addr, desc);
4775
4776 tcg_temp_free_ptr(t_pg);
4777 tcg_temp_free_i32(desc);
4778
4779 /* Replicate that first quadword. */
4780 if (vsz > 16) {
4781 unsigned dofs = vec_full_reg_offset(s, zt);
4782 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4783 }
4784 }
4785
4786 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4787 {
4788 if (a->rm == 31) {
4789 return false;
4790 }
4791 if (sve_access_check(s)) {
4792 int msz = dtype_msz(a->dtype);
4793 TCGv_i64 addr = new_tmp_a64(s);
4794 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4795 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4796 do_ldrq(s, a->rd, a->pg, addr, msz);
4797 }
4798 return true;
4799 }
4800
4801 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4802 {
4803 if (sve_access_check(s)) {
4804 TCGv_i64 addr = new_tmp_a64(s);
4805 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4806 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4807 }
4808 return true;
4809 }
4810
4811 /* Load and broadcast element. */
4812 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4813 {
4814 if (!sve_access_check(s)) {
4815 return true;
4816 }
4817
4818 unsigned vsz = vec_full_reg_size(s);
4819 unsigned psz = pred_full_reg_size(s);
4820 unsigned esz = dtype_esz[a->dtype];
4821 unsigned msz = dtype_msz(a->dtype);
4822 TCGLabel *over = gen_new_label();
4823 TCGv_i64 temp;
4824
4825 /* If the guarding predicate has no bits set, no load occurs. */
4826 if (psz <= 8) {
4827 /* Reduce the pred_esz_masks value simply to reduce the
4828 * size of the code generated here.
4829 */
4830 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4831 temp = tcg_temp_new_i64();
4832 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4833 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4834 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4835 tcg_temp_free_i64(temp);
4836 } else {
4837 TCGv_i32 t32 = tcg_temp_new_i32();
4838 find_last_active(s, t32, esz, a->pg);
4839 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4840 tcg_temp_free_i32(t32);
4841 }
4842
4843 /* Load the data. */
4844 temp = tcg_temp_new_i64();
4845 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4846 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4847 s->be_data | dtype_mop[a->dtype]);
4848
4849 /* Broadcast to *all* elements. */
4850 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4851 vsz, vsz, temp);
4852 tcg_temp_free_i64(temp);
4853
4854 /* Zero the inactive elements. */
4855 gen_set_label(over);
4856 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4857 return true;
4858 }
4859
4860 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4861 int msz, int esz, int nreg)
4862 {
4863 static gen_helper_gvec_mem * const fn_single[4][4] = {
4864 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4865 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4866 { NULL, gen_helper_sve_st1hh_r,
4867 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4868 { NULL, NULL,
4869 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4870 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4871 };
4872 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4873 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4874 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4875 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4876 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4877 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4878 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4879 };
4880 gen_helper_gvec_mem *fn;
4881
4882 if (nreg == 0) {
4883 /* ST1 */
4884 fn = fn_single[msz][esz];
4885 } else {
4886 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4887 assert(msz == esz);
4888 fn = fn_multiple[nreg - 1][msz];
4889 }
4890 assert(fn != NULL);
4891 do_mem_zpa(s, zt, pg, addr, fn);
4892 }
4893
4894 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4895 {
4896 if (a->rm == 31 || a->msz > a->esz) {
4897 return false;
4898 }
4899 if (sve_access_check(s)) {
4900 TCGv_i64 addr = new_tmp_a64(s);
4901 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
4902 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4903 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4904 }
4905 return true;
4906 }
4907
4908 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4909 {
4910 if (a->msz > a->esz) {
4911 return false;
4912 }
4913 if (sve_access_check(s)) {
4914 int vsz = vec_full_reg_size(s);
4915 int elements = vsz >> a->esz;
4916 TCGv_i64 addr = new_tmp_a64(s);
4917
4918 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4919 (a->imm * elements * (a->nreg + 1)) << a->msz);
4920 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4921 }
4922 return true;
4923 }
4924
4925 /*
4926 *** SVE gather loads / scatter stores
4927 */
4928
4929 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4930 TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4931 {
4932 unsigned vsz = vec_full_reg_size(s);
4933 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4934 TCGv_ptr t_zm = tcg_temp_new_ptr();
4935 TCGv_ptr t_pg = tcg_temp_new_ptr();
4936 TCGv_ptr t_zt = tcg_temp_new_ptr();
4937
4938 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4939 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4940 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4941 fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4942
4943 tcg_temp_free_ptr(t_zt);
4944 tcg_temp_free_ptr(t_zm);
4945 tcg_temp_free_ptr(t_pg);
4946 tcg_temp_free_i32(desc);
4947 }
4948
4949 /* Indexed by [ff][xs][u][msz]. */
4950 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4951 { { { gen_helper_sve_ldbss_zsu,
4952 gen_helper_sve_ldhss_zsu,
4953 NULL, },
4954 { gen_helper_sve_ldbsu_zsu,
4955 gen_helper_sve_ldhsu_zsu,
4956 gen_helper_sve_ldssu_zsu, } },
4957 { { gen_helper_sve_ldbss_zss,
4958 gen_helper_sve_ldhss_zss,
4959 NULL, },
4960 { gen_helper_sve_ldbsu_zss,
4961 gen_helper_sve_ldhsu_zss,
4962 gen_helper_sve_ldssu_zss, } } },
4963
4964 { { { gen_helper_sve_ldffbss_zsu,
4965 gen_helper_sve_ldffhss_zsu,
4966 NULL, },
4967 { gen_helper_sve_ldffbsu_zsu,
4968 gen_helper_sve_ldffhsu_zsu,
4969 gen_helper_sve_ldffssu_zsu, } },
4970 { { gen_helper_sve_ldffbss_zss,
4971 gen_helper_sve_ldffhss_zss,
4972 NULL, },
4973 { gen_helper_sve_ldffbsu_zss,
4974 gen_helper_sve_ldffhsu_zss,
4975 gen_helper_sve_ldffssu_zss, } } }
4976 };
4977
4978 /* Note that we overload xs=2 to indicate 64-bit offset. */
4979 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4980 { { { gen_helper_sve_ldbds_zsu,
4981 gen_helper_sve_ldhds_zsu,
4982 gen_helper_sve_ldsds_zsu,
4983 NULL, },
4984 { gen_helper_sve_ldbdu_zsu,
4985 gen_helper_sve_ldhdu_zsu,
4986 gen_helper_sve_ldsdu_zsu,
4987 gen_helper_sve_ldddu_zsu, } },
4988 { { gen_helper_sve_ldbds_zss,
4989 gen_helper_sve_ldhds_zss,
4990 gen_helper_sve_ldsds_zss,
4991 NULL, },
4992 { gen_helper_sve_ldbdu_zss,
4993 gen_helper_sve_ldhdu_zss,
4994 gen_helper_sve_ldsdu_zss,
4995 gen_helper_sve_ldddu_zss, } },
4996 { { gen_helper_sve_ldbds_zd,
4997 gen_helper_sve_ldhds_zd,
4998 gen_helper_sve_ldsds_zd,
4999 NULL, },
5000 { gen_helper_sve_ldbdu_zd,
5001 gen_helper_sve_ldhdu_zd,
5002 gen_helper_sve_ldsdu_zd,
5003 gen_helper_sve_ldddu_zd, } } },
5004
5005 { { { gen_helper_sve_ldffbds_zsu,
5006 gen_helper_sve_ldffhds_zsu,
5007 gen_helper_sve_ldffsds_zsu,
5008 NULL, },
5009 { gen_helper_sve_ldffbdu_zsu,
5010 gen_helper_sve_ldffhdu_zsu,
5011 gen_helper_sve_ldffsdu_zsu,
5012 gen_helper_sve_ldffddu_zsu, } },
5013 { { gen_helper_sve_ldffbds_zss,
5014 gen_helper_sve_ldffhds_zss,
5015 gen_helper_sve_ldffsds_zss,
5016 NULL, },
5017 { gen_helper_sve_ldffbdu_zss,
5018 gen_helper_sve_ldffhdu_zss,
5019 gen_helper_sve_ldffsdu_zss,
5020 gen_helper_sve_ldffddu_zss, } },
5021 { { gen_helper_sve_ldffbds_zd,
5022 gen_helper_sve_ldffhds_zd,
5023 gen_helper_sve_ldffsds_zd,
5024 NULL, },
5025 { gen_helper_sve_ldffbdu_zd,
5026 gen_helper_sve_ldffhdu_zd,
5027 gen_helper_sve_ldffsdu_zd,
5028 gen_helper_sve_ldffddu_zd, } } }
5029 };
5030
5031 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
5032 {
5033 gen_helper_gvec_mem_scatter *fn = NULL;
5034
5035 if (!sve_access_check(s)) {
5036 return true;
5037 }
5038
5039 switch (a->esz) {
5040 case MO_32:
5041 fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
5042 break;
5043 case MO_64:
5044 fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
5045 break;
5046 }
5047 assert(fn != NULL);
5048
5049 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5050 cpu_reg_sp(s, a->rn), fn);
5051 return true;
5052 }
5053
5054 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
5055 {
5056 gen_helper_gvec_mem_scatter *fn = NULL;
5057 TCGv_i64 imm;
5058
5059 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5060 return false;
5061 }
5062 if (!sve_access_check(s)) {
5063 return true;
5064 }
5065
5066 switch (a->esz) {
5067 case MO_32:
5068 fn = gather_load_fn32[a->ff][0][a->u][a->msz];
5069 break;
5070 case MO_64:
5071 fn = gather_load_fn64[a->ff][2][a->u][a->msz];
5072 break;
5073 }
5074 assert(fn != NULL);
5075
5076 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5077 * by loading the immediate into the scalar parameter.
5078 */
5079 imm = tcg_const_i64(a->imm << a->msz);
5080 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5081 tcg_temp_free_i64(imm);
5082 return true;
5083 }
5084
5085 /* Indexed by [xs][msz]. */
5086 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
5087 { gen_helper_sve_stbs_zsu,
5088 gen_helper_sve_sths_zsu,
5089 gen_helper_sve_stss_zsu, },
5090 { gen_helper_sve_stbs_zss,
5091 gen_helper_sve_sths_zss,
5092 gen_helper_sve_stss_zss, },
5093 };
5094
5095 /* Note that we overload xs=2 to indicate 64-bit offset. */
5096 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
5097 { gen_helper_sve_stbd_zsu,
5098 gen_helper_sve_sthd_zsu,
5099 gen_helper_sve_stsd_zsu,
5100 gen_helper_sve_stdd_zsu, },
5101 { gen_helper_sve_stbd_zss,
5102 gen_helper_sve_sthd_zss,
5103 gen_helper_sve_stsd_zss,
5104 gen_helper_sve_stdd_zss, },
5105 { gen_helper_sve_stbd_zd,
5106 gen_helper_sve_sthd_zd,
5107 gen_helper_sve_stsd_zd,
5108 gen_helper_sve_stdd_zd, },
5109 };
5110
5111 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
5112 {
5113 gen_helper_gvec_mem_scatter *fn;
5114
5115 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5116 return false;
5117 }
5118 if (!sve_access_check(s)) {
5119 return true;
5120 }
5121 switch (a->esz) {
5122 case MO_32:
5123 fn = scatter_store_fn32[a->xs][a->msz];
5124 break;
5125 case MO_64:
5126 fn = scatter_store_fn64[a->xs][a->msz];
5127 break;
5128 default:
5129 g_assert_not_reached();
5130 }
5131 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5132 cpu_reg_sp(s, a->rn), fn);
5133 return true;
5134 }
5135
5136 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
5137 {
5138 gen_helper_gvec_mem_scatter *fn = NULL;
5139 TCGv_i64 imm;
5140
5141 if (a->esz < a->msz) {
5142 return false;
5143 }
5144 if (!sve_access_check(s)) {
5145 return true;
5146 }
5147
5148 switch (a->esz) {
5149 case MO_32:
5150 fn = scatter_store_fn32[0][a->msz];
5151 break;
5152 case MO_64:
5153 fn = scatter_store_fn64[2][a->msz];
5154 break;
5155 }
5156 assert(fn != NULL);
5157
5158 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5159 * by loading the immediate into the scalar parameter.
5160 */
5161 imm = tcg_const_i64(a->imm << a->msz);
5162 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5163 tcg_temp_free_i64(imm);
5164 return true;
5165 }
5166
5167 /*
5168 * Prefetches
5169 */
5170
5171 static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
5172 {
5173 /* Prefetch is a nop within QEMU. */
5174 (void)sve_access_check(s);
5175 return true;
5176 }
5177
5178 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
5179 {
5180 if (a->rm == 31) {
5181 return false;
5182 }
5183 /* Prefetch is a nop within QEMU. */
5184 (void)sve_access_check(s);
5185 return true;
5186 }
5187
5188 /*
5189 * Move Prefix
5190 *
5191 * TODO: The implementation so far could handle predicated merging movprfx.
5192 * The helper functions as written take an extra source register to
5193 * use in the operation, but the result is only written when predication
5194 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5195 * to allow the final write back to the destination to be unconditional.
5196 * For predicated zeroing movprfx, we need to rearrange the helpers to
5197 * allow the final write back to zero inactives.
5198 *
5199 * In the meantime, just emit the moves.
5200 */
5201
5202 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a, uint32_t insn)
5203 {
5204 return do_mov_z(s, a->rd, a->rn);
5205 }
5206
5207 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5208 {
5209 if (sve_access_check(s)) {
5210 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5211 }
5212 return true;
5213 }
5214
5215 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5216 {
5217 if (sve_access_check(s)) {
5218 do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5219 }
5220 return true;
5221 }