]> git.proxmox.com Git - mirror_qemu.git/blob - target/arm/translate-sve.c
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20180615' into...
[mirror_qemu.git] / target / arm / translate-sve.c
1 /*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35
36
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
45 /*
46 * Helpers for extracting complex instruction fields.
47 */
48
49 /* See e.g. ASR (immediate, predicated).
50 * Returns -1 for unallocated encoding; diagnose later.
51 */
52 static int tszimm_esz(int x)
53 {
54 x >>= 3; /* discard imm3 */
55 return 31 - clz32(x);
56 }
57
58 static int tszimm_shr(int x)
59 {
60 return (16 << tszimm_esz(x)) - x;
61 }
62
63 /* See e.g. LSL (immediate, predicated). */
64 static int tszimm_shl(int x)
65 {
66 return x - (8 << tszimm_esz(x));
67 }
68
69 static inline int plus1(int x)
70 {
71 return x + 1;
72 }
73
74 /* The SH bit is in bit 8. Extract the low 8 and shift. */
75 static inline int expand_imm_sh8s(int x)
76 {
77 return (int8_t)x << (x & 0x100 ? 8 : 0);
78 }
79
80 static inline int expand_imm_sh8u(int x)
81 {
82 return (uint8_t)x << (x & 0x100 ? 8 : 0);
83 }
84
85 /*
86 * Include the generated decoder.
87 */
88
89 #include "decode-sve.inc.c"
90
91 /*
92 * Implement all of the translator functions referenced by the decoder.
93 */
94
95 /* Return the offset info CPUARMState of the predicate vector register Pn.
96 * Note for this purpose, FFR is P16.
97 */
98 static inline int pred_full_reg_offset(DisasContext *s, int regno)
99 {
100 return offsetof(CPUARMState, vfp.pregs[regno]);
101 }
102
103 /* Return the byte size of the whole predicate register, VL / 64. */
104 static inline int pred_full_reg_size(DisasContext *s)
105 {
106 return s->sve_len >> 3;
107 }
108
109 /* Round up the size of a register to a size allowed by
110 * the tcg vector infrastructure. Any operation which uses this
111 * size may assume that the bits above pred_full_reg_size are zero,
112 * and must leave them the same way.
113 *
114 * Note that this is not needed for the vector registers as they
115 * are always properly sized for tcg vectors.
116 */
117 static int size_for_gvec(int size)
118 {
119 if (size <= 8) {
120 return 8;
121 } else {
122 return QEMU_ALIGN_UP(size, 16);
123 }
124 }
125
126 static int pred_gvec_reg_size(DisasContext *s)
127 {
128 return size_for_gvec(pred_full_reg_size(s));
129 }
130
131 /* Invoke a vector expander on two Zregs. */
132 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
133 int esz, int rd, int rn)
134 {
135 if (sve_access_check(s)) {
136 unsigned vsz = vec_full_reg_size(s);
137 gvec_fn(esz, vec_full_reg_offset(s, rd),
138 vec_full_reg_offset(s, rn), vsz, vsz);
139 }
140 return true;
141 }
142
143 /* Invoke a vector expander on three Zregs. */
144 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
145 int esz, int rd, int rn, int rm)
146 {
147 if (sve_access_check(s)) {
148 unsigned vsz = vec_full_reg_size(s);
149 gvec_fn(esz, vec_full_reg_offset(s, rd),
150 vec_full_reg_offset(s, rn),
151 vec_full_reg_offset(s, rm), vsz, vsz);
152 }
153 return true;
154 }
155
156 /* Invoke a vector move on two Zregs. */
157 static bool do_mov_z(DisasContext *s, int rd, int rn)
158 {
159 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
160 }
161
162 /* Initialize a Zreg with replications of a 64-bit immediate. */
163 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
164 {
165 unsigned vsz = vec_full_reg_size(s);
166 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
167 }
168
169 /* Invoke a vector expander on two Pregs. */
170 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
171 int esz, int rd, int rn)
172 {
173 if (sve_access_check(s)) {
174 unsigned psz = pred_gvec_reg_size(s);
175 gvec_fn(esz, pred_full_reg_offset(s, rd),
176 pred_full_reg_offset(s, rn), psz, psz);
177 }
178 return true;
179 }
180
181 /* Invoke a vector expander on three Pregs. */
182 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
183 int esz, int rd, int rn, int rm)
184 {
185 if (sve_access_check(s)) {
186 unsigned psz = pred_gvec_reg_size(s);
187 gvec_fn(esz, pred_full_reg_offset(s, rd),
188 pred_full_reg_offset(s, rn),
189 pred_full_reg_offset(s, rm), psz, psz);
190 }
191 return true;
192 }
193
194 /* Invoke a vector operation on four Pregs. */
195 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
196 int rd, int rn, int rm, int rg)
197 {
198 if (sve_access_check(s)) {
199 unsigned psz = pred_gvec_reg_size(s);
200 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
201 pred_full_reg_offset(s, rn),
202 pred_full_reg_offset(s, rm),
203 pred_full_reg_offset(s, rg),
204 psz, psz, gvec_op);
205 }
206 return true;
207 }
208
209 /* Invoke a vector move on two Pregs. */
210 static bool do_mov_p(DisasContext *s, int rd, int rn)
211 {
212 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
213 }
214
215 /* Set the cpu flags as per a return from an SVE helper. */
216 static void do_pred_flags(TCGv_i32 t)
217 {
218 tcg_gen_mov_i32(cpu_NF, t);
219 tcg_gen_andi_i32(cpu_ZF, t, 2);
220 tcg_gen_andi_i32(cpu_CF, t, 1);
221 tcg_gen_movi_i32(cpu_VF, 0);
222 }
223
224 /* Subroutines computing the ARM PredTest psuedofunction. */
225 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
226 {
227 TCGv_i32 t = tcg_temp_new_i32();
228
229 gen_helper_sve_predtest1(t, d, g);
230 do_pred_flags(t);
231 tcg_temp_free_i32(t);
232 }
233
234 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
235 {
236 TCGv_ptr dptr = tcg_temp_new_ptr();
237 TCGv_ptr gptr = tcg_temp_new_ptr();
238 TCGv_i32 t;
239
240 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
241 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
242 t = tcg_const_i32(words);
243
244 gen_helper_sve_predtest(t, dptr, gptr, t);
245 tcg_temp_free_ptr(dptr);
246 tcg_temp_free_ptr(gptr);
247
248 do_pred_flags(t);
249 tcg_temp_free_i32(t);
250 }
251
252 /* For each element size, the bits within a predicate word that are active. */
253 const uint64_t pred_esz_masks[4] = {
254 0xffffffffffffffffull, 0x5555555555555555ull,
255 0x1111111111111111ull, 0x0101010101010101ull
256 };
257
258 /*
259 *** SVE Logical - Unpredicated Group
260 */
261
262 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
263 {
264 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
265 }
266
267 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
268 {
269 if (a->rn == a->rm) { /* MOV */
270 return do_mov_z(s, a->rd, a->rn);
271 } else {
272 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
273 }
274 }
275
276 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
277 {
278 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
279 }
280
281 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
282 {
283 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
284 }
285
286 /*
287 *** SVE Integer Arithmetic - Unpredicated Group
288 */
289
290 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291 {
292 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
293 }
294
295 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
296 {
297 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
298 }
299
300 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
301 {
302 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
303 }
304
305 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
306 {
307 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
308 }
309
310 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
311 {
312 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
313 }
314
315 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
316 {
317 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
318 }
319
320 /*
321 *** SVE Integer Arithmetic - Binary Predicated Group
322 */
323
324 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
325 {
326 unsigned vsz = vec_full_reg_size(s);
327 if (fn == NULL) {
328 return false;
329 }
330 if (sve_access_check(s)) {
331 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
332 vec_full_reg_offset(s, a->rn),
333 vec_full_reg_offset(s, a->rm),
334 pred_full_reg_offset(s, a->pg),
335 vsz, vsz, 0, fn);
336 }
337 return true;
338 }
339
340 #define DO_ZPZZ(NAME, name) \
341 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
342 uint32_t insn) \
343 { \
344 static gen_helper_gvec_4 * const fns[4] = { \
345 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
346 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
347 }; \
348 return do_zpzz_ool(s, a, fns[a->esz]); \
349 }
350
351 DO_ZPZZ(AND, and)
352 DO_ZPZZ(EOR, eor)
353 DO_ZPZZ(ORR, orr)
354 DO_ZPZZ(BIC, bic)
355
356 DO_ZPZZ(ADD, add)
357 DO_ZPZZ(SUB, sub)
358
359 DO_ZPZZ(SMAX, smax)
360 DO_ZPZZ(UMAX, umax)
361 DO_ZPZZ(SMIN, smin)
362 DO_ZPZZ(UMIN, umin)
363 DO_ZPZZ(SABD, sabd)
364 DO_ZPZZ(UABD, uabd)
365
366 DO_ZPZZ(MUL, mul)
367 DO_ZPZZ(SMULH, smulh)
368 DO_ZPZZ(UMULH, umulh)
369
370 DO_ZPZZ(ASR, asr)
371 DO_ZPZZ(LSR, lsr)
372 DO_ZPZZ(LSL, lsl)
373
374 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
375 {
376 static gen_helper_gvec_4 * const fns[4] = {
377 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
378 };
379 return do_zpzz_ool(s, a, fns[a->esz]);
380 }
381
382 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
383 {
384 static gen_helper_gvec_4 * const fns[4] = {
385 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
386 };
387 return do_zpzz_ool(s, a, fns[a->esz]);
388 }
389
390 DO_ZPZZ(SEL, sel)
391
392 #undef DO_ZPZZ
393
394 /*
395 *** SVE Integer Arithmetic - Unary Predicated Group
396 */
397
398 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
399 {
400 if (fn == NULL) {
401 return false;
402 }
403 if (sve_access_check(s)) {
404 unsigned vsz = vec_full_reg_size(s);
405 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
406 vec_full_reg_offset(s, a->rn),
407 pred_full_reg_offset(s, a->pg),
408 vsz, vsz, 0, fn);
409 }
410 return true;
411 }
412
413 #define DO_ZPZ(NAME, name) \
414 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
415 { \
416 static gen_helper_gvec_3 * const fns[4] = { \
417 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
418 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
419 }; \
420 return do_zpz_ool(s, a, fns[a->esz]); \
421 }
422
423 DO_ZPZ(CLS, cls)
424 DO_ZPZ(CLZ, clz)
425 DO_ZPZ(CNT_zpz, cnt_zpz)
426 DO_ZPZ(CNOT, cnot)
427 DO_ZPZ(NOT_zpz, not_zpz)
428 DO_ZPZ(ABS, abs)
429 DO_ZPZ(NEG, neg)
430
431 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
432 {
433 static gen_helper_gvec_3 * const fns[4] = {
434 NULL,
435 gen_helper_sve_fabs_h,
436 gen_helper_sve_fabs_s,
437 gen_helper_sve_fabs_d
438 };
439 return do_zpz_ool(s, a, fns[a->esz]);
440 }
441
442 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
443 {
444 static gen_helper_gvec_3 * const fns[4] = {
445 NULL,
446 gen_helper_sve_fneg_h,
447 gen_helper_sve_fneg_s,
448 gen_helper_sve_fneg_d
449 };
450 return do_zpz_ool(s, a, fns[a->esz]);
451 }
452
453 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
454 {
455 static gen_helper_gvec_3 * const fns[4] = {
456 NULL,
457 gen_helper_sve_sxtb_h,
458 gen_helper_sve_sxtb_s,
459 gen_helper_sve_sxtb_d
460 };
461 return do_zpz_ool(s, a, fns[a->esz]);
462 }
463
464 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
465 {
466 static gen_helper_gvec_3 * const fns[4] = {
467 NULL,
468 gen_helper_sve_uxtb_h,
469 gen_helper_sve_uxtb_s,
470 gen_helper_sve_uxtb_d
471 };
472 return do_zpz_ool(s, a, fns[a->esz]);
473 }
474
475 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
476 {
477 static gen_helper_gvec_3 * const fns[4] = {
478 NULL, NULL,
479 gen_helper_sve_sxth_s,
480 gen_helper_sve_sxth_d
481 };
482 return do_zpz_ool(s, a, fns[a->esz]);
483 }
484
485 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
486 {
487 static gen_helper_gvec_3 * const fns[4] = {
488 NULL, NULL,
489 gen_helper_sve_uxth_s,
490 gen_helper_sve_uxth_d
491 };
492 return do_zpz_ool(s, a, fns[a->esz]);
493 }
494
495 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
496 {
497 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
498 }
499
500 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
501 {
502 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
503 }
504
505 #undef DO_ZPZ
506
507 /*
508 *** SVE Integer Reduction Group
509 */
510
511 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
512 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
513 gen_helper_gvec_reduc *fn)
514 {
515 unsigned vsz = vec_full_reg_size(s);
516 TCGv_ptr t_zn, t_pg;
517 TCGv_i32 desc;
518 TCGv_i64 temp;
519
520 if (fn == NULL) {
521 return false;
522 }
523 if (!sve_access_check(s)) {
524 return true;
525 }
526
527 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
528 temp = tcg_temp_new_i64();
529 t_zn = tcg_temp_new_ptr();
530 t_pg = tcg_temp_new_ptr();
531
532 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
533 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
534 fn(temp, t_zn, t_pg, desc);
535 tcg_temp_free_ptr(t_zn);
536 tcg_temp_free_ptr(t_pg);
537 tcg_temp_free_i32(desc);
538
539 write_fp_dreg(s, a->rd, temp);
540 tcg_temp_free_i64(temp);
541 return true;
542 }
543
544 #define DO_VPZ(NAME, name) \
545 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
546 { \
547 static gen_helper_gvec_reduc * const fns[4] = { \
548 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
549 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
550 }; \
551 return do_vpz_ool(s, a, fns[a->esz]); \
552 }
553
554 DO_VPZ(ORV, orv)
555 DO_VPZ(ANDV, andv)
556 DO_VPZ(EORV, eorv)
557
558 DO_VPZ(UADDV, uaddv)
559 DO_VPZ(SMAXV, smaxv)
560 DO_VPZ(UMAXV, umaxv)
561 DO_VPZ(SMINV, sminv)
562 DO_VPZ(UMINV, uminv)
563
564 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
565 {
566 static gen_helper_gvec_reduc * const fns[4] = {
567 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
568 gen_helper_sve_saddv_s, NULL
569 };
570 return do_vpz_ool(s, a, fns[a->esz]);
571 }
572
573 #undef DO_VPZ
574
575 /*
576 *** SVE Shift by Immediate - Predicated Group
577 */
578
579 /* Store zero into every active element of Zd. We will use this for two
580 * and three-operand predicated instructions for which logic dictates a
581 * zero result.
582 */
583 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
584 {
585 static gen_helper_gvec_2 * const fns[4] = {
586 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
587 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
588 };
589 if (sve_access_check(s)) {
590 unsigned vsz = vec_full_reg_size(s);
591 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
592 pred_full_reg_offset(s, pg),
593 vsz, vsz, 0, fns[esz]);
594 }
595 return true;
596 }
597
598 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
599 gen_helper_gvec_3 *fn)
600 {
601 if (sve_access_check(s)) {
602 unsigned vsz = vec_full_reg_size(s);
603 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
604 vec_full_reg_offset(s, a->rn),
605 pred_full_reg_offset(s, a->pg),
606 vsz, vsz, a->imm, fn);
607 }
608 return true;
609 }
610
611 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
612 {
613 static gen_helper_gvec_3 * const fns[4] = {
614 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
615 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
616 };
617 if (a->esz < 0) {
618 /* Invalid tsz encoding -- see tszimm_esz. */
619 return false;
620 }
621 /* Shift by element size is architecturally valid. For
622 arithmetic right-shift, it's the same as by one less. */
623 a->imm = MIN(a->imm, (8 << a->esz) - 1);
624 return do_zpzi_ool(s, a, fns[a->esz]);
625 }
626
627 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
628 {
629 static gen_helper_gvec_3 * const fns[4] = {
630 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
631 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
632 };
633 if (a->esz < 0) {
634 return false;
635 }
636 /* Shift by element size is architecturally valid.
637 For logical shifts, it is a zeroing operation. */
638 if (a->imm >= (8 << a->esz)) {
639 return do_clr_zp(s, a->rd, a->pg, a->esz);
640 } else {
641 return do_zpzi_ool(s, a, fns[a->esz]);
642 }
643 }
644
645 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
646 {
647 static gen_helper_gvec_3 * const fns[4] = {
648 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
649 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
650 };
651 if (a->esz < 0) {
652 return false;
653 }
654 /* Shift by element size is architecturally valid.
655 For logical shifts, it is a zeroing operation. */
656 if (a->imm >= (8 << a->esz)) {
657 return do_clr_zp(s, a->rd, a->pg, a->esz);
658 } else {
659 return do_zpzi_ool(s, a, fns[a->esz]);
660 }
661 }
662
663 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
664 {
665 static gen_helper_gvec_3 * const fns[4] = {
666 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
667 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
668 };
669 if (a->esz < 0) {
670 return false;
671 }
672 /* Shift by element size is architecturally valid. For arithmetic
673 right shift for division, it is a zeroing operation. */
674 if (a->imm >= (8 << a->esz)) {
675 return do_clr_zp(s, a->rd, a->pg, a->esz);
676 } else {
677 return do_zpzi_ool(s, a, fns[a->esz]);
678 }
679 }
680
681 /*
682 *** SVE Bitwise Shift - Predicated Group
683 */
684
685 #define DO_ZPZW(NAME, name) \
686 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
687 uint32_t insn) \
688 { \
689 static gen_helper_gvec_4 * const fns[3] = { \
690 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
691 gen_helper_sve_##name##_zpzw_s, \
692 }; \
693 if (a->esz < 0 || a->esz >= 3) { \
694 return false; \
695 } \
696 return do_zpzz_ool(s, a, fns[a->esz]); \
697 }
698
699 DO_ZPZW(ASR, asr)
700 DO_ZPZW(LSR, lsr)
701 DO_ZPZW(LSL, lsl)
702
703 #undef DO_ZPZW
704
705 /*
706 *** SVE Bitwise Shift - Unpredicated Group
707 */
708
709 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
710 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
711 int64_t, uint32_t, uint32_t))
712 {
713 if (a->esz < 0) {
714 /* Invalid tsz encoding -- see tszimm_esz. */
715 return false;
716 }
717 if (sve_access_check(s)) {
718 unsigned vsz = vec_full_reg_size(s);
719 /* Shift by element size is architecturally valid. For
720 arithmetic right-shift, it's the same as by one less.
721 Otherwise it is a zeroing operation. */
722 if (a->imm >= 8 << a->esz) {
723 if (asr) {
724 a->imm = (8 << a->esz) - 1;
725 } else {
726 do_dupi_z(s, a->rd, 0);
727 return true;
728 }
729 }
730 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
731 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
732 }
733 return true;
734 }
735
736 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
737 {
738 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
739 }
740
741 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
742 {
743 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
744 }
745
746 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
747 {
748 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
749 }
750
751 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
752 {
753 if (fn == NULL) {
754 return false;
755 }
756 if (sve_access_check(s)) {
757 unsigned vsz = vec_full_reg_size(s);
758 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
759 vec_full_reg_offset(s, a->rn),
760 vec_full_reg_offset(s, a->rm),
761 vsz, vsz, 0, fn);
762 }
763 return true;
764 }
765
766 #define DO_ZZW(NAME, name) \
767 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
768 uint32_t insn) \
769 { \
770 static gen_helper_gvec_3 * const fns[4] = { \
771 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
772 gen_helper_sve_##name##_zzw_s, NULL \
773 }; \
774 return do_zzw_ool(s, a, fns[a->esz]); \
775 }
776
777 DO_ZZW(ASR, asr)
778 DO_ZZW(LSR, lsr)
779 DO_ZZW(LSL, lsl)
780
781 #undef DO_ZZW
782
783 /*
784 *** SVE Integer Multiply-Add Group
785 */
786
787 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
788 gen_helper_gvec_5 *fn)
789 {
790 if (sve_access_check(s)) {
791 unsigned vsz = vec_full_reg_size(s);
792 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
793 vec_full_reg_offset(s, a->ra),
794 vec_full_reg_offset(s, a->rn),
795 vec_full_reg_offset(s, a->rm),
796 pred_full_reg_offset(s, a->pg),
797 vsz, vsz, 0, fn);
798 }
799 return true;
800 }
801
802 #define DO_ZPZZZ(NAME, name) \
803 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
804 { \
805 static gen_helper_gvec_5 * const fns[4] = { \
806 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
807 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
808 }; \
809 return do_zpzzz_ool(s, a, fns[a->esz]); \
810 }
811
812 DO_ZPZZZ(MLA, mla)
813 DO_ZPZZZ(MLS, mls)
814
815 #undef DO_ZPZZZ
816
817 /*
818 *** SVE Index Generation Group
819 */
820
821 static void do_index(DisasContext *s, int esz, int rd,
822 TCGv_i64 start, TCGv_i64 incr)
823 {
824 unsigned vsz = vec_full_reg_size(s);
825 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
826 TCGv_ptr t_zd = tcg_temp_new_ptr();
827
828 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
829 if (esz == 3) {
830 gen_helper_sve_index_d(t_zd, start, incr, desc);
831 } else {
832 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
833 static index_fn * const fns[3] = {
834 gen_helper_sve_index_b,
835 gen_helper_sve_index_h,
836 gen_helper_sve_index_s,
837 };
838 TCGv_i32 s32 = tcg_temp_new_i32();
839 TCGv_i32 i32 = tcg_temp_new_i32();
840
841 tcg_gen_extrl_i64_i32(s32, start);
842 tcg_gen_extrl_i64_i32(i32, incr);
843 fns[esz](t_zd, s32, i32, desc);
844
845 tcg_temp_free_i32(s32);
846 tcg_temp_free_i32(i32);
847 }
848 tcg_temp_free_ptr(t_zd);
849 tcg_temp_free_i32(desc);
850 }
851
852 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
853 {
854 if (sve_access_check(s)) {
855 TCGv_i64 start = tcg_const_i64(a->imm1);
856 TCGv_i64 incr = tcg_const_i64(a->imm2);
857 do_index(s, a->esz, a->rd, start, incr);
858 tcg_temp_free_i64(start);
859 tcg_temp_free_i64(incr);
860 }
861 return true;
862 }
863
864 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
865 {
866 if (sve_access_check(s)) {
867 TCGv_i64 start = tcg_const_i64(a->imm);
868 TCGv_i64 incr = cpu_reg(s, a->rm);
869 do_index(s, a->esz, a->rd, start, incr);
870 tcg_temp_free_i64(start);
871 }
872 return true;
873 }
874
875 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
876 {
877 if (sve_access_check(s)) {
878 TCGv_i64 start = cpu_reg(s, a->rn);
879 TCGv_i64 incr = tcg_const_i64(a->imm);
880 do_index(s, a->esz, a->rd, start, incr);
881 tcg_temp_free_i64(incr);
882 }
883 return true;
884 }
885
886 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
887 {
888 if (sve_access_check(s)) {
889 TCGv_i64 start = cpu_reg(s, a->rn);
890 TCGv_i64 incr = cpu_reg(s, a->rm);
891 do_index(s, a->esz, a->rd, start, incr);
892 }
893 return true;
894 }
895
896 /*
897 *** SVE Stack Allocation Group
898 */
899
900 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
901 {
902 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
903 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
904 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
905 return true;
906 }
907
908 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
909 {
910 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
911 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
912 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
913 return true;
914 }
915
916 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
917 {
918 TCGv_i64 reg = cpu_reg(s, a->rd);
919 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
920 return true;
921 }
922
923 /*
924 *** SVE Compute Vector Address Group
925 */
926
927 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
928 {
929 if (sve_access_check(s)) {
930 unsigned vsz = vec_full_reg_size(s);
931 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
932 vec_full_reg_offset(s, a->rn),
933 vec_full_reg_offset(s, a->rm),
934 vsz, vsz, a->imm, fn);
935 }
936 return true;
937 }
938
939 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
940 {
941 return do_adr(s, a, gen_helper_sve_adr_p32);
942 }
943
944 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
945 {
946 return do_adr(s, a, gen_helper_sve_adr_p64);
947 }
948
949 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
950 {
951 return do_adr(s, a, gen_helper_sve_adr_s32);
952 }
953
954 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
955 {
956 return do_adr(s, a, gen_helper_sve_adr_u32);
957 }
958
959 /*
960 *** SVE Integer Misc - Unpredicated Group
961 */
962
963 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
964 {
965 static gen_helper_gvec_2 * const fns[4] = {
966 NULL,
967 gen_helper_sve_fexpa_h,
968 gen_helper_sve_fexpa_s,
969 gen_helper_sve_fexpa_d,
970 };
971 if (a->esz == 0) {
972 return false;
973 }
974 if (sve_access_check(s)) {
975 unsigned vsz = vec_full_reg_size(s);
976 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
977 vec_full_reg_offset(s, a->rn),
978 vsz, vsz, 0, fns[a->esz]);
979 }
980 return true;
981 }
982
983 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
984 {
985 static gen_helper_gvec_3 * const fns[4] = {
986 NULL,
987 gen_helper_sve_ftssel_h,
988 gen_helper_sve_ftssel_s,
989 gen_helper_sve_ftssel_d,
990 };
991 if (a->esz == 0) {
992 return false;
993 }
994 if (sve_access_check(s)) {
995 unsigned vsz = vec_full_reg_size(s);
996 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
997 vec_full_reg_offset(s, a->rn),
998 vec_full_reg_offset(s, a->rm),
999 vsz, vsz, 0, fns[a->esz]);
1000 }
1001 return true;
1002 }
1003
1004 /*
1005 *** SVE Predicate Logical Operations Group
1006 */
1007
1008 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1009 const GVecGen4 *gvec_op)
1010 {
1011 if (!sve_access_check(s)) {
1012 return true;
1013 }
1014
1015 unsigned psz = pred_gvec_reg_size(s);
1016 int dofs = pred_full_reg_offset(s, a->rd);
1017 int nofs = pred_full_reg_offset(s, a->rn);
1018 int mofs = pred_full_reg_offset(s, a->rm);
1019 int gofs = pred_full_reg_offset(s, a->pg);
1020
1021 if (psz == 8) {
1022 /* Do the operation and the flags generation in temps. */
1023 TCGv_i64 pd = tcg_temp_new_i64();
1024 TCGv_i64 pn = tcg_temp_new_i64();
1025 TCGv_i64 pm = tcg_temp_new_i64();
1026 TCGv_i64 pg = tcg_temp_new_i64();
1027
1028 tcg_gen_ld_i64(pn, cpu_env, nofs);
1029 tcg_gen_ld_i64(pm, cpu_env, mofs);
1030 tcg_gen_ld_i64(pg, cpu_env, gofs);
1031
1032 gvec_op->fni8(pd, pn, pm, pg);
1033 tcg_gen_st_i64(pd, cpu_env, dofs);
1034
1035 do_predtest1(pd, pg);
1036
1037 tcg_temp_free_i64(pd);
1038 tcg_temp_free_i64(pn);
1039 tcg_temp_free_i64(pm);
1040 tcg_temp_free_i64(pg);
1041 } else {
1042 /* The operation and flags generation is large. The computation
1043 * of the flags depends on the original contents of the guarding
1044 * predicate. If the destination overwrites the guarding predicate,
1045 * then the easiest way to get this right is to save a copy.
1046 */
1047 int tofs = gofs;
1048 if (a->rd == a->pg) {
1049 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1050 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1051 }
1052
1053 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1054 do_predtest(s, dofs, tofs, psz / 8);
1055 }
1056 return true;
1057 }
1058
1059 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1060 {
1061 tcg_gen_and_i64(pd, pn, pm);
1062 tcg_gen_and_i64(pd, pd, pg);
1063 }
1064
1065 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1066 TCGv_vec pm, TCGv_vec pg)
1067 {
1068 tcg_gen_and_vec(vece, pd, pn, pm);
1069 tcg_gen_and_vec(vece, pd, pd, pg);
1070 }
1071
1072 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1073 {
1074 static const GVecGen4 op = {
1075 .fni8 = gen_and_pg_i64,
1076 .fniv = gen_and_pg_vec,
1077 .fno = gen_helper_sve_and_pppp,
1078 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1079 };
1080 if (a->s) {
1081 return do_pppp_flags(s, a, &op);
1082 } else if (a->rn == a->rm) {
1083 if (a->pg == a->rn) {
1084 return do_mov_p(s, a->rd, a->rn);
1085 } else {
1086 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1087 }
1088 } else if (a->pg == a->rn || a->pg == a->rm) {
1089 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1090 } else {
1091 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1092 }
1093 }
1094
1095 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1096 {
1097 tcg_gen_andc_i64(pd, pn, pm);
1098 tcg_gen_and_i64(pd, pd, pg);
1099 }
1100
1101 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1102 TCGv_vec pm, TCGv_vec pg)
1103 {
1104 tcg_gen_andc_vec(vece, pd, pn, pm);
1105 tcg_gen_and_vec(vece, pd, pd, pg);
1106 }
1107
1108 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1109 {
1110 static const GVecGen4 op = {
1111 .fni8 = gen_bic_pg_i64,
1112 .fniv = gen_bic_pg_vec,
1113 .fno = gen_helper_sve_bic_pppp,
1114 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1115 };
1116 if (a->s) {
1117 return do_pppp_flags(s, a, &op);
1118 } else if (a->pg == a->rn) {
1119 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1120 } else {
1121 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1122 }
1123 }
1124
1125 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1126 {
1127 tcg_gen_xor_i64(pd, pn, pm);
1128 tcg_gen_and_i64(pd, pd, pg);
1129 }
1130
1131 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1132 TCGv_vec pm, TCGv_vec pg)
1133 {
1134 tcg_gen_xor_vec(vece, pd, pn, pm);
1135 tcg_gen_and_vec(vece, pd, pd, pg);
1136 }
1137
1138 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1139 {
1140 static const GVecGen4 op = {
1141 .fni8 = gen_eor_pg_i64,
1142 .fniv = gen_eor_pg_vec,
1143 .fno = gen_helper_sve_eor_pppp,
1144 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1145 };
1146 if (a->s) {
1147 return do_pppp_flags(s, a, &op);
1148 } else {
1149 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1150 }
1151 }
1152
1153 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1154 {
1155 tcg_gen_and_i64(pn, pn, pg);
1156 tcg_gen_andc_i64(pm, pm, pg);
1157 tcg_gen_or_i64(pd, pn, pm);
1158 }
1159
1160 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1161 TCGv_vec pm, TCGv_vec pg)
1162 {
1163 tcg_gen_and_vec(vece, pn, pn, pg);
1164 tcg_gen_andc_vec(vece, pm, pm, pg);
1165 tcg_gen_or_vec(vece, pd, pn, pm);
1166 }
1167
1168 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1169 {
1170 static const GVecGen4 op = {
1171 .fni8 = gen_sel_pg_i64,
1172 .fniv = gen_sel_pg_vec,
1173 .fno = gen_helper_sve_sel_pppp,
1174 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1175 };
1176 if (a->s) {
1177 return false;
1178 } else {
1179 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1180 }
1181 }
1182
1183 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1184 {
1185 tcg_gen_or_i64(pd, pn, pm);
1186 tcg_gen_and_i64(pd, pd, pg);
1187 }
1188
1189 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1190 TCGv_vec pm, TCGv_vec pg)
1191 {
1192 tcg_gen_or_vec(vece, pd, pn, pm);
1193 tcg_gen_and_vec(vece, pd, pd, pg);
1194 }
1195
1196 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1197 {
1198 static const GVecGen4 op = {
1199 .fni8 = gen_orr_pg_i64,
1200 .fniv = gen_orr_pg_vec,
1201 .fno = gen_helper_sve_orr_pppp,
1202 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1203 };
1204 if (a->s) {
1205 return do_pppp_flags(s, a, &op);
1206 } else if (a->pg == a->rn && a->rn == a->rm) {
1207 return do_mov_p(s, a->rd, a->rn);
1208 } else {
1209 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1210 }
1211 }
1212
1213 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1214 {
1215 tcg_gen_orc_i64(pd, pn, pm);
1216 tcg_gen_and_i64(pd, pd, pg);
1217 }
1218
1219 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1220 TCGv_vec pm, TCGv_vec pg)
1221 {
1222 tcg_gen_orc_vec(vece, pd, pn, pm);
1223 tcg_gen_and_vec(vece, pd, pd, pg);
1224 }
1225
1226 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1227 {
1228 static const GVecGen4 op = {
1229 .fni8 = gen_orn_pg_i64,
1230 .fniv = gen_orn_pg_vec,
1231 .fno = gen_helper_sve_orn_pppp,
1232 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1233 };
1234 if (a->s) {
1235 return do_pppp_flags(s, a, &op);
1236 } else {
1237 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1238 }
1239 }
1240
1241 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1242 {
1243 tcg_gen_or_i64(pd, pn, pm);
1244 tcg_gen_andc_i64(pd, pg, pd);
1245 }
1246
1247 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1248 TCGv_vec pm, TCGv_vec pg)
1249 {
1250 tcg_gen_or_vec(vece, pd, pn, pm);
1251 tcg_gen_andc_vec(vece, pd, pg, pd);
1252 }
1253
1254 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1255 {
1256 static const GVecGen4 op = {
1257 .fni8 = gen_nor_pg_i64,
1258 .fniv = gen_nor_pg_vec,
1259 .fno = gen_helper_sve_nor_pppp,
1260 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1261 };
1262 if (a->s) {
1263 return do_pppp_flags(s, a, &op);
1264 } else {
1265 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1266 }
1267 }
1268
1269 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1270 {
1271 tcg_gen_and_i64(pd, pn, pm);
1272 tcg_gen_andc_i64(pd, pg, pd);
1273 }
1274
1275 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1276 TCGv_vec pm, TCGv_vec pg)
1277 {
1278 tcg_gen_and_vec(vece, pd, pn, pm);
1279 tcg_gen_andc_vec(vece, pd, pg, pd);
1280 }
1281
1282 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1283 {
1284 static const GVecGen4 op = {
1285 .fni8 = gen_nand_pg_i64,
1286 .fniv = gen_nand_pg_vec,
1287 .fno = gen_helper_sve_nand_pppp,
1288 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1289 };
1290 if (a->s) {
1291 return do_pppp_flags(s, a, &op);
1292 } else {
1293 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1294 }
1295 }
1296
1297 /*
1298 *** SVE Predicate Misc Group
1299 */
1300
1301 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1302 {
1303 if (sve_access_check(s)) {
1304 int nofs = pred_full_reg_offset(s, a->rn);
1305 int gofs = pred_full_reg_offset(s, a->pg);
1306 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1307
1308 if (words == 1) {
1309 TCGv_i64 pn = tcg_temp_new_i64();
1310 TCGv_i64 pg = tcg_temp_new_i64();
1311
1312 tcg_gen_ld_i64(pn, cpu_env, nofs);
1313 tcg_gen_ld_i64(pg, cpu_env, gofs);
1314 do_predtest1(pn, pg);
1315
1316 tcg_temp_free_i64(pn);
1317 tcg_temp_free_i64(pg);
1318 } else {
1319 do_predtest(s, nofs, gofs, words);
1320 }
1321 }
1322 return true;
1323 }
1324
1325 /* See the ARM pseudocode DecodePredCount. */
1326 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1327 {
1328 unsigned elements = fullsz >> esz;
1329 unsigned bound;
1330
1331 switch (pattern) {
1332 case 0x0: /* POW2 */
1333 return pow2floor(elements);
1334 case 0x1: /* VL1 */
1335 case 0x2: /* VL2 */
1336 case 0x3: /* VL3 */
1337 case 0x4: /* VL4 */
1338 case 0x5: /* VL5 */
1339 case 0x6: /* VL6 */
1340 case 0x7: /* VL7 */
1341 case 0x8: /* VL8 */
1342 bound = pattern;
1343 break;
1344 case 0x9: /* VL16 */
1345 case 0xa: /* VL32 */
1346 case 0xb: /* VL64 */
1347 case 0xc: /* VL128 */
1348 case 0xd: /* VL256 */
1349 bound = 16 << (pattern - 9);
1350 break;
1351 case 0x1d: /* MUL4 */
1352 return elements - elements % 4;
1353 case 0x1e: /* MUL3 */
1354 return elements - elements % 3;
1355 case 0x1f: /* ALL */
1356 return elements;
1357 default: /* #uimm5 */
1358 return 0;
1359 }
1360 return elements >= bound ? bound : 0;
1361 }
1362
1363 /* This handles all of the predicate initialization instructions,
1364 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1365 * so that decode_pred_count returns 0. For SETFFR, we will have
1366 * set RD == 16 == FFR.
1367 */
1368 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1369 {
1370 if (!sve_access_check(s)) {
1371 return true;
1372 }
1373
1374 unsigned fullsz = vec_full_reg_size(s);
1375 unsigned ofs = pred_full_reg_offset(s, rd);
1376 unsigned numelem, setsz, i;
1377 uint64_t word, lastword;
1378 TCGv_i64 t;
1379
1380 numelem = decode_pred_count(fullsz, pat, esz);
1381
1382 /* Determine what we must store into each bit, and how many. */
1383 if (numelem == 0) {
1384 lastword = word = 0;
1385 setsz = fullsz;
1386 } else {
1387 setsz = numelem << esz;
1388 lastword = word = pred_esz_masks[esz];
1389 if (setsz % 64) {
1390 lastword &= ~(-1ull << (setsz % 64));
1391 }
1392 }
1393
1394 t = tcg_temp_new_i64();
1395 if (fullsz <= 64) {
1396 tcg_gen_movi_i64(t, lastword);
1397 tcg_gen_st_i64(t, cpu_env, ofs);
1398 goto done;
1399 }
1400
1401 if (word == lastword) {
1402 unsigned maxsz = size_for_gvec(fullsz / 8);
1403 unsigned oprsz = size_for_gvec(setsz / 8);
1404
1405 if (oprsz * 8 == setsz) {
1406 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1407 goto done;
1408 }
1409 if (oprsz * 8 == setsz + 8) {
1410 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1411 tcg_gen_movi_i64(t, 0);
1412 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1413 goto done;
1414 }
1415 }
1416
1417 setsz /= 8;
1418 fullsz /= 8;
1419
1420 tcg_gen_movi_i64(t, word);
1421 for (i = 0; i < setsz; i += 8) {
1422 tcg_gen_st_i64(t, cpu_env, ofs + i);
1423 }
1424 if (lastword != word) {
1425 tcg_gen_movi_i64(t, lastword);
1426 tcg_gen_st_i64(t, cpu_env, ofs + i);
1427 i += 8;
1428 }
1429 if (i < fullsz) {
1430 tcg_gen_movi_i64(t, 0);
1431 for (; i < fullsz; i += 8) {
1432 tcg_gen_st_i64(t, cpu_env, ofs + i);
1433 }
1434 }
1435
1436 done:
1437 tcg_temp_free_i64(t);
1438
1439 /* PTRUES */
1440 if (setflag) {
1441 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1442 tcg_gen_movi_i32(cpu_CF, word == 0);
1443 tcg_gen_movi_i32(cpu_VF, 0);
1444 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1445 }
1446 return true;
1447 }
1448
1449 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1450 {
1451 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1452 }
1453
1454 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1455 {
1456 /* Note pat == 31 is #all, to set all elements. */
1457 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1458 }
1459
1460 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1461 {
1462 /* Note pat == 32 is #unimp, to set no elements. */
1463 return do_predset(s, 0, a->rd, 32, false);
1464 }
1465
1466 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1467 {
1468 /* The path through do_pppp_flags is complicated enough to want to avoid
1469 * duplication. Frob the arguments into the form of a predicated AND.
1470 */
1471 arg_rprr_s alt_a = {
1472 .rd = a->rd, .pg = a->pg, .s = a->s,
1473 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1474 };
1475 return trans_AND_pppp(s, &alt_a, insn);
1476 }
1477
1478 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1479 {
1480 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1481 }
1482
1483 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1484 {
1485 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1486 }
1487
1488 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1489 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1490 TCGv_ptr, TCGv_i32))
1491 {
1492 if (!sve_access_check(s)) {
1493 return true;
1494 }
1495
1496 TCGv_ptr t_pd = tcg_temp_new_ptr();
1497 TCGv_ptr t_pg = tcg_temp_new_ptr();
1498 TCGv_i32 t;
1499 unsigned desc;
1500
1501 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1502 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1503
1504 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1505 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1506 t = tcg_const_i32(desc);
1507
1508 gen_fn(t, t_pd, t_pg, t);
1509 tcg_temp_free_ptr(t_pd);
1510 tcg_temp_free_ptr(t_pg);
1511
1512 do_pred_flags(t);
1513 tcg_temp_free_i32(t);
1514 return true;
1515 }
1516
1517 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1518 {
1519 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1520 }
1521
1522 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1523 {
1524 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1525 }
1526
1527 /*
1528 *** SVE Element Count Group
1529 */
1530
1531 /* Perform an inline saturating addition of a 32-bit value within
1532 * a 64-bit register. The second operand is known to be positive,
1533 * which halves the comparisions we must perform to bound the result.
1534 */
1535 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1536 {
1537 int64_t ibound;
1538 TCGv_i64 bound;
1539 TCGCond cond;
1540
1541 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1542 if (u) {
1543 tcg_gen_ext32u_i64(reg, reg);
1544 } else {
1545 tcg_gen_ext32s_i64(reg, reg);
1546 }
1547 if (d) {
1548 tcg_gen_sub_i64(reg, reg, val);
1549 ibound = (u ? 0 : INT32_MIN);
1550 cond = TCG_COND_LT;
1551 } else {
1552 tcg_gen_add_i64(reg, reg, val);
1553 ibound = (u ? UINT32_MAX : INT32_MAX);
1554 cond = TCG_COND_GT;
1555 }
1556 bound = tcg_const_i64(ibound);
1557 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1558 tcg_temp_free_i64(bound);
1559 }
1560
1561 /* Similarly with 64-bit values. */
1562 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1563 {
1564 TCGv_i64 t0 = tcg_temp_new_i64();
1565 TCGv_i64 t1 = tcg_temp_new_i64();
1566 TCGv_i64 t2;
1567
1568 if (u) {
1569 if (d) {
1570 tcg_gen_sub_i64(t0, reg, val);
1571 tcg_gen_movi_i64(t1, 0);
1572 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1573 } else {
1574 tcg_gen_add_i64(t0, reg, val);
1575 tcg_gen_movi_i64(t1, -1);
1576 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1577 }
1578 } else {
1579 if (d) {
1580 /* Detect signed overflow for subtraction. */
1581 tcg_gen_xor_i64(t0, reg, val);
1582 tcg_gen_sub_i64(t1, reg, val);
1583 tcg_gen_xor_i64(reg, reg, t0);
1584 tcg_gen_and_i64(t0, t0, reg);
1585
1586 /* Bound the result. */
1587 tcg_gen_movi_i64(reg, INT64_MIN);
1588 t2 = tcg_const_i64(0);
1589 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1590 } else {
1591 /* Detect signed overflow for addition. */
1592 tcg_gen_xor_i64(t0, reg, val);
1593 tcg_gen_add_i64(reg, reg, val);
1594 tcg_gen_xor_i64(t1, reg, val);
1595 tcg_gen_andc_i64(t0, t1, t0);
1596
1597 /* Bound the result. */
1598 tcg_gen_movi_i64(t1, INT64_MAX);
1599 t2 = tcg_const_i64(0);
1600 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1601 }
1602 tcg_temp_free_i64(t2);
1603 }
1604 tcg_temp_free_i64(t0);
1605 tcg_temp_free_i64(t1);
1606 }
1607
1608 /* Similarly with a vector and a scalar operand. */
1609 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1610 TCGv_i64 val, bool u, bool d)
1611 {
1612 unsigned vsz = vec_full_reg_size(s);
1613 TCGv_ptr dptr, nptr;
1614 TCGv_i32 t32, desc;
1615 TCGv_i64 t64;
1616
1617 dptr = tcg_temp_new_ptr();
1618 nptr = tcg_temp_new_ptr();
1619 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1620 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1621 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1622
1623 switch (esz) {
1624 case MO_8:
1625 t32 = tcg_temp_new_i32();
1626 tcg_gen_extrl_i64_i32(t32, val);
1627 if (d) {
1628 tcg_gen_neg_i32(t32, t32);
1629 }
1630 if (u) {
1631 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1632 } else {
1633 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1634 }
1635 tcg_temp_free_i32(t32);
1636 break;
1637
1638 case MO_16:
1639 t32 = tcg_temp_new_i32();
1640 tcg_gen_extrl_i64_i32(t32, val);
1641 if (d) {
1642 tcg_gen_neg_i32(t32, t32);
1643 }
1644 if (u) {
1645 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1646 } else {
1647 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1648 }
1649 tcg_temp_free_i32(t32);
1650 break;
1651
1652 case MO_32:
1653 t64 = tcg_temp_new_i64();
1654 if (d) {
1655 tcg_gen_neg_i64(t64, val);
1656 } else {
1657 tcg_gen_mov_i64(t64, val);
1658 }
1659 if (u) {
1660 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1661 } else {
1662 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1663 }
1664 tcg_temp_free_i64(t64);
1665 break;
1666
1667 case MO_64:
1668 if (u) {
1669 if (d) {
1670 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1671 } else {
1672 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1673 }
1674 } else if (d) {
1675 t64 = tcg_temp_new_i64();
1676 tcg_gen_neg_i64(t64, val);
1677 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1678 tcg_temp_free_i64(t64);
1679 } else {
1680 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1681 }
1682 break;
1683
1684 default:
1685 g_assert_not_reached();
1686 }
1687
1688 tcg_temp_free_ptr(dptr);
1689 tcg_temp_free_ptr(nptr);
1690 tcg_temp_free_i32(desc);
1691 }
1692
1693 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1694 {
1695 if (sve_access_check(s)) {
1696 unsigned fullsz = vec_full_reg_size(s);
1697 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1698 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1699 }
1700 return true;
1701 }
1702
1703 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1704 {
1705 if (sve_access_check(s)) {
1706 unsigned fullsz = vec_full_reg_size(s);
1707 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1708 int inc = numelem * a->imm * (a->d ? -1 : 1);
1709 TCGv_i64 reg = cpu_reg(s, a->rd);
1710
1711 tcg_gen_addi_i64(reg, reg, inc);
1712 }
1713 return true;
1714 }
1715
1716 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1717 uint32_t insn)
1718 {
1719 if (!sve_access_check(s)) {
1720 return true;
1721 }
1722
1723 unsigned fullsz = vec_full_reg_size(s);
1724 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1725 int inc = numelem * a->imm;
1726 TCGv_i64 reg = cpu_reg(s, a->rd);
1727
1728 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1729 if (inc == 0) {
1730 if (a->u) {
1731 tcg_gen_ext32u_i64(reg, reg);
1732 } else {
1733 tcg_gen_ext32s_i64(reg, reg);
1734 }
1735 } else {
1736 TCGv_i64 t = tcg_const_i64(inc);
1737 do_sat_addsub_32(reg, t, a->u, a->d);
1738 tcg_temp_free_i64(t);
1739 }
1740 return true;
1741 }
1742
1743 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1744 uint32_t insn)
1745 {
1746 if (!sve_access_check(s)) {
1747 return true;
1748 }
1749
1750 unsigned fullsz = vec_full_reg_size(s);
1751 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752 int inc = numelem * a->imm;
1753 TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755 if (inc != 0) {
1756 TCGv_i64 t = tcg_const_i64(inc);
1757 do_sat_addsub_64(reg, t, a->u, a->d);
1758 tcg_temp_free_i64(t);
1759 }
1760 return true;
1761 }
1762
1763 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1764 {
1765 if (a->esz == 0) {
1766 return false;
1767 }
1768
1769 unsigned fullsz = vec_full_reg_size(s);
1770 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1771 int inc = numelem * a->imm;
1772
1773 if (inc != 0) {
1774 if (sve_access_check(s)) {
1775 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1776 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1777 vec_full_reg_offset(s, a->rn),
1778 t, fullsz, fullsz);
1779 tcg_temp_free_i64(t);
1780 }
1781 } else {
1782 do_mov_z(s, a->rd, a->rn);
1783 }
1784 return true;
1785 }
1786
1787 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1788 uint32_t insn)
1789 {
1790 if (a->esz == 0) {
1791 return false;
1792 }
1793
1794 unsigned fullsz = vec_full_reg_size(s);
1795 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1796 int inc = numelem * a->imm;
1797
1798 if (inc != 0) {
1799 if (sve_access_check(s)) {
1800 TCGv_i64 t = tcg_const_i64(inc);
1801 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1802 tcg_temp_free_i64(t);
1803 }
1804 } else {
1805 do_mov_z(s, a->rd, a->rn);
1806 }
1807 return true;
1808 }
1809
1810 /*
1811 *** SVE Bitwise Immediate Group
1812 */
1813
1814 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1815 {
1816 uint64_t imm;
1817 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1818 extract32(a->dbm, 0, 6),
1819 extract32(a->dbm, 6, 6))) {
1820 return false;
1821 }
1822 if (sve_access_check(s)) {
1823 unsigned vsz = vec_full_reg_size(s);
1824 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1825 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1826 }
1827 return true;
1828 }
1829
1830 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1831 {
1832 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1833 }
1834
1835 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1836 {
1837 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1838 }
1839
1840 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1841 {
1842 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1843 }
1844
1845 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1846 {
1847 uint64_t imm;
1848 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1849 extract32(a->dbm, 0, 6),
1850 extract32(a->dbm, 6, 6))) {
1851 return false;
1852 }
1853 if (sve_access_check(s)) {
1854 do_dupi_z(s, a->rd, imm);
1855 }
1856 return true;
1857 }
1858
1859 /*
1860 *** SVE Integer Wide Immediate - Predicated Group
1861 */
1862
1863 /* Implement all merging copies. This is used for CPY (immediate),
1864 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1865 */
1866 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1867 TCGv_i64 val)
1868 {
1869 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1870 static gen_cpy * const fns[4] = {
1871 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1872 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1873 };
1874 unsigned vsz = vec_full_reg_size(s);
1875 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1876 TCGv_ptr t_zd = tcg_temp_new_ptr();
1877 TCGv_ptr t_zn = tcg_temp_new_ptr();
1878 TCGv_ptr t_pg = tcg_temp_new_ptr();
1879
1880 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1881 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1882 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1883
1884 fns[esz](t_zd, t_zn, t_pg, val, desc);
1885
1886 tcg_temp_free_ptr(t_zd);
1887 tcg_temp_free_ptr(t_zn);
1888 tcg_temp_free_ptr(t_pg);
1889 tcg_temp_free_i32(desc);
1890 }
1891
1892 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1893 {
1894 if (a->esz == 0) {
1895 return false;
1896 }
1897 if (sve_access_check(s)) {
1898 /* Decode the VFP immediate. */
1899 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1900 TCGv_i64 t_imm = tcg_const_i64(imm);
1901 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1902 tcg_temp_free_i64(t_imm);
1903 }
1904 return true;
1905 }
1906
1907 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1908 {
1909 if (a->esz == 0 && extract32(insn, 13, 1)) {
1910 return false;
1911 }
1912 if (sve_access_check(s)) {
1913 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1914 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1915 tcg_temp_free_i64(t_imm);
1916 }
1917 return true;
1918 }
1919
1920 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1921 {
1922 static gen_helper_gvec_2i * const fns[4] = {
1923 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1924 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1925 };
1926
1927 if (a->esz == 0 && extract32(insn, 13, 1)) {
1928 return false;
1929 }
1930 if (sve_access_check(s)) {
1931 unsigned vsz = vec_full_reg_size(s);
1932 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1933 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1934 pred_full_reg_offset(s, a->pg),
1935 t_imm, vsz, vsz, 0, fns[a->esz]);
1936 tcg_temp_free_i64(t_imm);
1937 }
1938 return true;
1939 }
1940
1941 /*
1942 *** SVE Permute Extract Group
1943 */
1944
1945 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1946 {
1947 if (!sve_access_check(s)) {
1948 return true;
1949 }
1950
1951 unsigned vsz = vec_full_reg_size(s);
1952 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1953 unsigned n_siz = vsz - n_ofs;
1954 unsigned d = vec_full_reg_offset(s, a->rd);
1955 unsigned n = vec_full_reg_offset(s, a->rn);
1956 unsigned m = vec_full_reg_offset(s, a->rm);
1957
1958 /* Use host vector move insns if we have appropriate sizes
1959 * and no unfortunate overlap.
1960 */
1961 if (m != d
1962 && n_ofs == size_for_gvec(n_ofs)
1963 && n_siz == size_for_gvec(n_siz)
1964 && (d != n || n_siz <= n_ofs)) {
1965 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1966 if (n_ofs != 0) {
1967 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1968 }
1969 } else {
1970 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1971 }
1972 return true;
1973 }
1974
1975 /*
1976 *** SVE Permute - Unpredicated Group
1977 */
1978
1979 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1980 {
1981 if (sve_access_check(s)) {
1982 unsigned vsz = vec_full_reg_size(s);
1983 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1984 vsz, vsz, cpu_reg_sp(s, a->rn));
1985 }
1986 return true;
1987 }
1988
1989 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1990 {
1991 if ((a->imm & 0x1f) == 0) {
1992 return false;
1993 }
1994 if (sve_access_check(s)) {
1995 unsigned vsz = vec_full_reg_size(s);
1996 unsigned dofs = vec_full_reg_offset(s, a->rd);
1997 unsigned esz, index;
1998
1999 esz = ctz32(a->imm);
2000 index = a->imm >> (esz + 1);
2001
2002 if ((index << esz) < vsz) {
2003 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2004 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2005 } else {
2006 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2007 }
2008 }
2009 return true;
2010 }
2011
2012 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2013 {
2014 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2015 static gen_insr * const fns[4] = {
2016 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2017 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2018 };
2019 unsigned vsz = vec_full_reg_size(s);
2020 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2021 TCGv_ptr t_zd = tcg_temp_new_ptr();
2022 TCGv_ptr t_zn = tcg_temp_new_ptr();
2023
2024 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2025 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2026
2027 fns[a->esz](t_zd, t_zn, val, desc);
2028
2029 tcg_temp_free_ptr(t_zd);
2030 tcg_temp_free_ptr(t_zn);
2031 tcg_temp_free_i32(desc);
2032 }
2033
2034 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2035 {
2036 if (sve_access_check(s)) {
2037 TCGv_i64 t = tcg_temp_new_i64();
2038 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2039 do_insr_i64(s, a, t);
2040 tcg_temp_free_i64(t);
2041 }
2042 return true;
2043 }
2044
2045 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2046 {
2047 if (sve_access_check(s)) {
2048 do_insr_i64(s, a, cpu_reg(s, a->rm));
2049 }
2050 return true;
2051 }
2052
2053 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2054 {
2055 static gen_helper_gvec_2 * const fns[4] = {
2056 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2057 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2058 };
2059
2060 if (sve_access_check(s)) {
2061 unsigned vsz = vec_full_reg_size(s);
2062 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2063 vec_full_reg_offset(s, a->rn),
2064 vsz, vsz, 0, fns[a->esz]);
2065 }
2066 return true;
2067 }
2068
2069 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2070 {
2071 static gen_helper_gvec_3 * const fns[4] = {
2072 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2073 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2074 };
2075
2076 if (sve_access_check(s)) {
2077 unsigned vsz = vec_full_reg_size(s);
2078 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2079 vec_full_reg_offset(s, a->rn),
2080 vec_full_reg_offset(s, a->rm),
2081 vsz, vsz, 0, fns[a->esz]);
2082 }
2083 return true;
2084 }
2085
2086 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2087 {
2088 static gen_helper_gvec_2 * const fns[4][2] = {
2089 { NULL, NULL },
2090 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2091 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2092 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2093 };
2094
2095 if (a->esz == 0) {
2096 return false;
2097 }
2098 if (sve_access_check(s)) {
2099 unsigned vsz = vec_full_reg_size(s);
2100 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2101 vec_full_reg_offset(s, a->rn)
2102 + (a->h ? vsz / 2 : 0),
2103 vsz, vsz, 0, fns[a->esz][a->u]);
2104 }
2105 return true;
2106 }
2107
2108 /*
2109 *** SVE Permute - Predicates Group
2110 */
2111
2112 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2113 gen_helper_gvec_3 *fn)
2114 {
2115 if (!sve_access_check(s)) {
2116 return true;
2117 }
2118
2119 unsigned vsz = pred_full_reg_size(s);
2120
2121 /* Predicate sizes may be smaller and cannot use simd_desc.
2122 We cannot round up, as we do elsewhere, because we need
2123 the exact size for ZIP2 and REV. We retain the style for
2124 the other helpers for consistency. */
2125 TCGv_ptr t_d = tcg_temp_new_ptr();
2126 TCGv_ptr t_n = tcg_temp_new_ptr();
2127 TCGv_ptr t_m = tcg_temp_new_ptr();
2128 TCGv_i32 t_desc;
2129 int desc;
2130
2131 desc = vsz - 2;
2132 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2133 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2134
2135 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2136 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2137 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2138 t_desc = tcg_const_i32(desc);
2139
2140 fn(t_d, t_n, t_m, t_desc);
2141
2142 tcg_temp_free_ptr(t_d);
2143 tcg_temp_free_ptr(t_n);
2144 tcg_temp_free_ptr(t_m);
2145 tcg_temp_free_i32(t_desc);
2146 return true;
2147 }
2148
2149 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2150 gen_helper_gvec_2 *fn)
2151 {
2152 if (!sve_access_check(s)) {
2153 return true;
2154 }
2155
2156 unsigned vsz = pred_full_reg_size(s);
2157 TCGv_ptr t_d = tcg_temp_new_ptr();
2158 TCGv_ptr t_n = tcg_temp_new_ptr();
2159 TCGv_i32 t_desc;
2160 int desc;
2161
2162 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2163 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2164
2165 /* Predicate sizes may be smaller and cannot use simd_desc.
2166 We cannot round up, as we do elsewhere, because we need
2167 the exact size for ZIP2 and REV. We retain the style for
2168 the other helpers for consistency. */
2169
2170 desc = vsz - 2;
2171 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2172 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2173 t_desc = tcg_const_i32(desc);
2174
2175 fn(t_d, t_n, t_desc);
2176
2177 tcg_temp_free_i32(t_desc);
2178 tcg_temp_free_ptr(t_d);
2179 tcg_temp_free_ptr(t_n);
2180 return true;
2181 }
2182
2183 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2184 {
2185 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2186 }
2187
2188 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2189 {
2190 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2191 }
2192
2193 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2194 {
2195 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2196 }
2197
2198 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2199 {
2200 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2201 }
2202
2203 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2204 {
2205 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2206 }
2207
2208 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2209 {
2210 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2211 }
2212
2213 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2214 {
2215 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2216 }
2217
2218 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2219 {
2220 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2221 }
2222
2223 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2224 {
2225 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2226 }
2227
2228 /*
2229 *** SVE Permute - Interleaving Group
2230 */
2231
2232 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2233 {
2234 static gen_helper_gvec_3 * const fns[4] = {
2235 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2236 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2237 };
2238
2239 if (sve_access_check(s)) {
2240 unsigned vsz = vec_full_reg_size(s);
2241 unsigned high_ofs = high ? vsz / 2 : 0;
2242 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2243 vec_full_reg_offset(s, a->rn) + high_ofs,
2244 vec_full_reg_offset(s, a->rm) + high_ofs,
2245 vsz, vsz, 0, fns[a->esz]);
2246 }
2247 return true;
2248 }
2249
2250 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2251 gen_helper_gvec_3 *fn)
2252 {
2253 if (sve_access_check(s)) {
2254 unsigned vsz = vec_full_reg_size(s);
2255 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2256 vec_full_reg_offset(s, a->rn),
2257 vec_full_reg_offset(s, a->rm),
2258 vsz, vsz, data, fn);
2259 }
2260 return true;
2261 }
2262
2263 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2264 {
2265 return do_zip(s, a, false);
2266 }
2267
2268 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2269 {
2270 return do_zip(s, a, true);
2271 }
2272
2273 static gen_helper_gvec_3 * const uzp_fns[4] = {
2274 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2275 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2276 };
2277
2278 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2279 {
2280 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2281 }
2282
2283 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2284 {
2285 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2286 }
2287
2288 static gen_helper_gvec_3 * const trn_fns[4] = {
2289 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2290 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2291 };
2292
2293 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2294 {
2295 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2296 }
2297
2298 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2299 {
2300 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2301 }
2302
2303 /*
2304 *** SVE Permute Vector - Predicated Group
2305 */
2306
2307 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2308 {
2309 static gen_helper_gvec_3 * const fns[4] = {
2310 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2311 };
2312 return do_zpz_ool(s, a, fns[a->esz]);
2313 }
2314
2315 /* Call the helper that computes the ARM LastActiveElement pseudocode
2316 * function, scaled by the element size. This includes the not found
2317 * indication; e.g. not found for esz=3 is -8.
2318 */
2319 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2320 {
2321 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2322 * round up, as we do elsewhere, because we need the exact size.
2323 */
2324 TCGv_ptr t_p = tcg_temp_new_ptr();
2325 TCGv_i32 t_desc;
2326 unsigned vsz = pred_full_reg_size(s);
2327 unsigned desc;
2328
2329 desc = vsz - 2;
2330 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2331
2332 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2333 t_desc = tcg_const_i32(desc);
2334
2335 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2336
2337 tcg_temp_free_i32(t_desc);
2338 tcg_temp_free_ptr(t_p);
2339 }
2340
2341 /* Increment LAST to the offset of the next element in the vector,
2342 * wrapping around to 0.
2343 */
2344 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2345 {
2346 unsigned vsz = vec_full_reg_size(s);
2347
2348 tcg_gen_addi_i32(last, last, 1 << esz);
2349 if (is_power_of_2(vsz)) {
2350 tcg_gen_andi_i32(last, last, vsz - 1);
2351 } else {
2352 TCGv_i32 max = tcg_const_i32(vsz);
2353 TCGv_i32 zero = tcg_const_i32(0);
2354 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2355 tcg_temp_free_i32(max);
2356 tcg_temp_free_i32(zero);
2357 }
2358 }
2359
2360 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2361 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2362 {
2363 unsigned vsz = vec_full_reg_size(s);
2364
2365 if (is_power_of_2(vsz)) {
2366 tcg_gen_andi_i32(last, last, vsz - 1);
2367 } else {
2368 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2369 TCGv_i32 zero = tcg_const_i32(0);
2370 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2371 tcg_temp_free_i32(max);
2372 tcg_temp_free_i32(zero);
2373 }
2374 }
2375
2376 /* Load an unsigned element of ESZ from BASE+OFS. */
2377 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2378 {
2379 TCGv_i64 r = tcg_temp_new_i64();
2380
2381 switch (esz) {
2382 case 0:
2383 tcg_gen_ld8u_i64(r, base, ofs);
2384 break;
2385 case 1:
2386 tcg_gen_ld16u_i64(r, base, ofs);
2387 break;
2388 case 2:
2389 tcg_gen_ld32u_i64(r, base, ofs);
2390 break;
2391 case 3:
2392 tcg_gen_ld_i64(r, base, ofs);
2393 break;
2394 default:
2395 g_assert_not_reached();
2396 }
2397 return r;
2398 }
2399
2400 /* Load an unsigned element of ESZ from RM[LAST]. */
2401 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2402 int rm, int esz)
2403 {
2404 TCGv_ptr p = tcg_temp_new_ptr();
2405 TCGv_i64 r;
2406
2407 /* Convert offset into vector into offset into ENV.
2408 * The final adjustment for the vector register base
2409 * is added via constant offset to the load.
2410 */
2411 #ifdef HOST_WORDS_BIGENDIAN
2412 /* Adjust for element ordering. See vec_reg_offset. */
2413 if (esz < 3) {
2414 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2415 }
2416 #endif
2417 tcg_gen_ext_i32_ptr(p, last);
2418 tcg_gen_add_ptr(p, p, cpu_env);
2419
2420 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2421 tcg_temp_free_ptr(p);
2422
2423 return r;
2424 }
2425
2426 /* Compute CLAST for a Zreg. */
2427 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2428 {
2429 TCGv_i32 last;
2430 TCGLabel *over;
2431 TCGv_i64 ele;
2432 unsigned vsz, esz = a->esz;
2433
2434 if (!sve_access_check(s)) {
2435 return true;
2436 }
2437
2438 last = tcg_temp_local_new_i32();
2439 over = gen_new_label();
2440
2441 find_last_active(s, last, esz, a->pg);
2442
2443 /* There is of course no movcond for a 2048-bit vector,
2444 * so we must branch over the actual store.
2445 */
2446 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2447
2448 if (!before) {
2449 incr_last_active(s, last, esz);
2450 }
2451
2452 ele = load_last_active(s, last, a->rm, esz);
2453 tcg_temp_free_i32(last);
2454
2455 vsz = vec_full_reg_size(s);
2456 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2457 tcg_temp_free_i64(ele);
2458
2459 /* If this insn used MOVPRFX, we may need a second move. */
2460 if (a->rd != a->rn) {
2461 TCGLabel *done = gen_new_label();
2462 tcg_gen_br(done);
2463
2464 gen_set_label(over);
2465 do_mov_z(s, a->rd, a->rn);
2466
2467 gen_set_label(done);
2468 } else {
2469 gen_set_label(over);
2470 }
2471 return true;
2472 }
2473
2474 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2475 {
2476 return do_clast_vector(s, a, false);
2477 }
2478
2479 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2480 {
2481 return do_clast_vector(s, a, true);
2482 }
2483
2484 /* Compute CLAST for a scalar. */
2485 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2486 bool before, TCGv_i64 reg_val)
2487 {
2488 TCGv_i32 last = tcg_temp_new_i32();
2489 TCGv_i64 ele, cmp, zero;
2490
2491 find_last_active(s, last, esz, pg);
2492
2493 /* Extend the original value of last prior to incrementing. */
2494 cmp = tcg_temp_new_i64();
2495 tcg_gen_ext_i32_i64(cmp, last);
2496
2497 if (!before) {
2498 incr_last_active(s, last, esz);
2499 }
2500
2501 /* The conceit here is that while last < 0 indicates not found, after
2502 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2503 * from which we can load garbage. We then discard the garbage with
2504 * a conditional move.
2505 */
2506 ele = load_last_active(s, last, rm, esz);
2507 tcg_temp_free_i32(last);
2508
2509 zero = tcg_const_i64(0);
2510 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2511
2512 tcg_temp_free_i64(zero);
2513 tcg_temp_free_i64(cmp);
2514 tcg_temp_free_i64(ele);
2515 }
2516
2517 /* Compute CLAST for a Vreg. */
2518 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2519 {
2520 if (sve_access_check(s)) {
2521 int esz = a->esz;
2522 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2523 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2524
2525 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2526 write_fp_dreg(s, a->rd, reg);
2527 tcg_temp_free_i64(reg);
2528 }
2529 return true;
2530 }
2531
2532 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2533 {
2534 return do_clast_fp(s, a, false);
2535 }
2536
2537 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2538 {
2539 return do_clast_fp(s, a, true);
2540 }
2541
2542 /* Compute CLAST for a Xreg. */
2543 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2544 {
2545 TCGv_i64 reg;
2546
2547 if (!sve_access_check(s)) {
2548 return true;
2549 }
2550
2551 reg = cpu_reg(s, a->rd);
2552 switch (a->esz) {
2553 case 0:
2554 tcg_gen_ext8u_i64(reg, reg);
2555 break;
2556 case 1:
2557 tcg_gen_ext16u_i64(reg, reg);
2558 break;
2559 case 2:
2560 tcg_gen_ext32u_i64(reg, reg);
2561 break;
2562 case 3:
2563 break;
2564 default:
2565 g_assert_not_reached();
2566 }
2567
2568 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2569 return true;
2570 }
2571
2572 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2573 {
2574 return do_clast_general(s, a, false);
2575 }
2576
2577 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2578 {
2579 return do_clast_general(s, a, true);
2580 }
2581
2582 /* Compute LAST for a scalar. */
2583 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2584 int pg, int rm, bool before)
2585 {
2586 TCGv_i32 last = tcg_temp_new_i32();
2587 TCGv_i64 ret;
2588
2589 find_last_active(s, last, esz, pg);
2590 if (before) {
2591 wrap_last_active(s, last, esz);
2592 } else {
2593 incr_last_active(s, last, esz);
2594 }
2595
2596 ret = load_last_active(s, last, rm, esz);
2597 tcg_temp_free_i32(last);
2598 return ret;
2599 }
2600
2601 /* Compute LAST for a Vreg. */
2602 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2603 {
2604 if (sve_access_check(s)) {
2605 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2606 write_fp_dreg(s, a->rd, val);
2607 tcg_temp_free_i64(val);
2608 }
2609 return true;
2610 }
2611
2612 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2613 {
2614 return do_last_fp(s, a, false);
2615 }
2616
2617 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2618 {
2619 return do_last_fp(s, a, true);
2620 }
2621
2622 /* Compute LAST for a Xreg. */
2623 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2624 {
2625 if (sve_access_check(s)) {
2626 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2627 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2628 tcg_temp_free_i64(val);
2629 }
2630 return true;
2631 }
2632
2633 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2634 {
2635 return do_last_general(s, a, false);
2636 }
2637
2638 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2639 {
2640 return do_last_general(s, a, true);
2641 }
2642
2643 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2644 {
2645 if (sve_access_check(s)) {
2646 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2647 }
2648 return true;
2649 }
2650
2651 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2652 {
2653 if (sve_access_check(s)) {
2654 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2655 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2656 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2657 tcg_temp_free_i64(t);
2658 }
2659 return true;
2660 }
2661
2662 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2663 {
2664 static gen_helper_gvec_3 * const fns[4] = {
2665 NULL,
2666 gen_helper_sve_revb_h,
2667 gen_helper_sve_revb_s,
2668 gen_helper_sve_revb_d,
2669 };
2670 return do_zpz_ool(s, a, fns[a->esz]);
2671 }
2672
2673 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2674 {
2675 static gen_helper_gvec_3 * const fns[4] = {
2676 NULL,
2677 NULL,
2678 gen_helper_sve_revh_s,
2679 gen_helper_sve_revh_d,
2680 };
2681 return do_zpz_ool(s, a, fns[a->esz]);
2682 }
2683
2684 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2685 {
2686 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2687 }
2688
2689 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2690 {
2691 static gen_helper_gvec_3 * const fns[4] = {
2692 gen_helper_sve_rbit_b,
2693 gen_helper_sve_rbit_h,
2694 gen_helper_sve_rbit_s,
2695 gen_helper_sve_rbit_d,
2696 };
2697 return do_zpz_ool(s, a, fns[a->esz]);
2698 }
2699
2700 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2701 {
2702 if (sve_access_check(s)) {
2703 unsigned vsz = vec_full_reg_size(s);
2704 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2705 vec_full_reg_offset(s, a->rn),
2706 vec_full_reg_offset(s, a->rm),
2707 pred_full_reg_offset(s, a->pg),
2708 vsz, vsz, a->esz, gen_helper_sve_splice);
2709 }
2710 return true;
2711 }
2712
2713 /*
2714 *** SVE Integer Compare - Vectors Group
2715 */
2716
2717 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2718 gen_helper_gvec_flags_4 *gen_fn)
2719 {
2720 TCGv_ptr pd, zn, zm, pg;
2721 unsigned vsz;
2722 TCGv_i32 t;
2723
2724 if (gen_fn == NULL) {
2725 return false;
2726 }
2727 if (!sve_access_check(s)) {
2728 return true;
2729 }
2730
2731 vsz = vec_full_reg_size(s);
2732 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2733 pd = tcg_temp_new_ptr();
2734 zn = tcg_temp_new_ptr();
2735 zm = tcg_temp_new_ptr();
2736 pg = tcg_temp_new_ptr();
2737
2738 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2739 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2740 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2741 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2742
2743 gen_fn(t, pd, zn, zm, pg, t);
2744
2745 tcg_temp_free_ptr(pd);
2746 tcg_temp_free_ptr(zn);
2747 tcg_temp_free_ptr(zm);
2748 tcg_temp_free_ptr(pg);
2749
2750 do_pred_flags(t);
2751
2752 tcg_temp_free_i32(t);
2753 return true;
2754 }
2755
2756 #define DO_PPZZ(NAME, name) \
2757 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2758 uint32_t insn) \
2759 { \
2760 static gen_helper_gvec_flags_4 * const fns[4] = { \
2761 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2762 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2763 }; \
2764 return do_ppzz_flags(s, a, fns[a->esz]); \
2765 }
2766
2767 DO_PPZZ(CMPEQ, cmpeq)
2768 DO_PPZZ(CMPNE, cmpne)
2769 DO_PPZZ(CMPGT, cmpgt)
2770 DO_PPZZ(CMPGE, cmpge)
2771 DO_PPZZ(CMPHI, cmphi)
2772 DO_PPZZ(CMPHS, cmphs)
2773
2774 #undef DO_PPZZ
2775
2776 #define DO_PPZW(NAME, name) \
2777 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2778 uint32_t insn) \
2779 { \
2780 static gen_helper_gvec_flags_4 * const fns[4] = { \
2781 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2782 gen_helper_sve_##name##_ppzw_s, NULL \
2783 }; \
2784 return do_ppzz_flags(s, a, fns[a->esz]); \
2785 }
2786
2787 DO_PPZW(CMPEQ, cmpeq)
2788 DO_PPZW(CMPNE, cmpne)
2789 DO_PPZW(CMPGT, cmpgt)
2790 DO_PPZW(CMPGE, cmpge)
2791 DO_PPZW(CMPHI, cmphi)
2792 DO_PPZW(CMPHS, cmphs)
2793 DO_PPZW(CMPLT, cmplt)
2794 DO_PPZW(CMPLE, cmple)
2795 DO_PPZW(CMPLO, cmplo)
2796 DO_PPZW(CMPLS, cmpls)
2797
2798 #undef DO_PPZW
2799
2800 /*
2801 *** SVE Integer Compare - Immediate Groups
2802 */
2803
2804 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2805 gen_helper_gvec_flags_3 *gen_fn)
2806 {
2807 TCGv_ptr pd, zn, pg;
2808 unsigned vsz;
2809 TCGv_i32 t;
2810
2811 if (gen_fn == NULL) {
2812 return false;
2813 }
2814 if (!sve_access_check(s)) {
2815 return true;
2816 }
2817
2818 vsz = vec_full_reg_size(s);
2819 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2820 pd = tcg_temp_new_ptr();
2821 zn = tcg_temp_new_ptr();
2822 pg = tcg_temp_new_ptr();
2823
2824 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2825 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2826 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2827
2828 gen_fn(t, pd, zn, pg, t);
2829
2830 tcg_temp_free_ptr(pd);
2831 tcg_temp_free_ptr(zn);
2832 tcg_temp_free_ptr(pg);
2833
2834 do_pred_flags(t);
2835
2836 tcg_temp_free_i32(t);
2837 return true;
2838 }
2839
2840 #define DO_PPZI(NAME, name) \
2841 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2842 uint32_t insn) \
2843 { \
2844 static gen_helper_gvec_flags_3 * const fns[4] = { \
2845 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2846 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2847 }; \
2848 return do_ppzi_flags(s, a, fns[a->esz]); \
2849 }
2850
2851 DO_PPZI(CMPEQ, cmpeq)
2852 DO_PPZI(CMPNE, cmpne)
2853 DO_PPZI(CMPGT, cmpgt)
2854 DO_PPZI(CMPGE, cmpge)
2855 DO_PPZI(CMPHI, cmphi)
2856 DO_PPZI(CMPHS, cmphs)
2857 DO_PPZI(CMPLT, cmplt)
2858 DO_PPZI(CMPLE, cmple)
2859 DO_PPZI(CMPLO, cmplo)
2860 DO_PPZI(CMPLS, cmpls)
2861
2862 #undef DO_PPZI
2863
2864 /*
2865 *** SVE Partition Break Group
2866 */
2867
2868 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2869 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2870 {
2871 if (!sve_access_check(s)) {
2872 return true;
2873 }
2874
2875 unsigned vsz = pred_full_reg_size(s);
2876
2877 /* Predicate sizes may be smaller and cannot use simd_desc. */
2878 TCGv_ptr d = tcg_temp_new_ptr();
2879 TCGv_ptr n = tcg_temp_new_ptr();
2880 TCGv_ptr m = tcg_temp_new_ptr();
2881 TCGv_ptr g = tcg_temp_new_ptr();
2882 TCGv_i32 t = tcg_const_i32(vsz - 2);
2883
2884 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2885 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2886 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2887 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2888
2889 if (a->s) {
2890 fn_s(t, d, n, m, g, t);
2891 do_pred_flags(t);
2892 } else {
2893 fn(d, n, m, g, t);
2894 }
2895 tcg_temp_free_ptr(d);
2896 tcg_temp_free_ptr(n);
2897 tcg_temp_free_ptr(m);
2898 tcg_temp_free_ptr(g);
2899 tcg_temp_free_i32(t);
2900 return true;
2901 }
2902
2903 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2904 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2905 {
2906 if (!sve_access_check(s)) {
2907 return true;
2908 }
2909
2910 unsigned vsz = pred_full_reg_size(s);
2911
2912 /* Predicate sizes may be smaller and cannot use simd_desc. */
2913 TCGv_ptr d = tcg_temp_new_ptr();
2914 TCGv_ptr n = tcg_temp_new_ptr();
2915 TCGv_ptr g = tcg_temp_new_ptr();
2916 TCGv_i32 t = tcg_const_i32(vsz - 2);
2917
2918 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2919 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2920 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2921
2922 if (a->s) {
2923 fn_s(t, d, n, g, t);
2924 do_pred_flags(t);
2925 } else {
2926 fn(d, n, g, t);
2927 }
2928 tcg_temp_free_ptr(d);
2929 tcg_temp_free_ptr(n);
2930 tcg_temp_free_ptr(g);
2931 tcg_temp_free_i32(t);
2932 return true;
2933 }
2934
2935 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2936 {
2937 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2938 }
2939
2940 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2941 {
2942 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2943 }
2944
2945 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2946 {
2947 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2948 }
2949
2950 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2951 {
2952 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2953 }
2954
2955 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2956 {
2957 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2958 }
2959
2960 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2961 {
2962 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2963 }
2964
2965 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2966 {
2967 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2968 }
2969
2970 /*
2971 *** SVE Predicate Count Group
2972 */
2973
2974 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2975 {
2976 unsigned psz = pred_full_reg_size(s);
2977
2978 if (psz <= 8) {
2979 uint64_t psz_mask;
2980
2981 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2982 if (pn != pg) {
2983 TCGv_i64 g = tcg_temp_new_i64();
2984 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2985 tcg_gen_and_i64(val, val, g);
2986 tcg_temp_free_i64(g);
2987 }
2988
2989 /* Reduce the pred_esz_masks value simply to reduce the
2990 * size of the code generated here.
2991 */
2992 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2993 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2994
2995 tcg_gen_ctpop_i64(val, val);
2996 } else {
2997 TCGv_ptr t_pn = tcg_temp_new_ptr();
2998 TCGv_ptr t_pg = tcg_temp_new_ptr();
2999 unsigned desc;
3000 TCGv_i32 t_desc;
3001
3002 desc = psz - 2;
3003 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3004
3005 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3006 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3007 t_desc = tcg_const_i32(desc);
3008
3009 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3010 tcg_temp_free_ptr(t_pn);
3011 tcg_temp_free_ptr(t_pg);
3012 tcg_temp_free_i32(t_desc);
3013 }
3014 }
3015
3016 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3017 {
3018 if (sve_access_check(s)) {
3019 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3020 }
3021 return true;
3022 }
3023
3024 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3025 uint32_t insn)
3026 {
3027 if (sve_access_check(s)) {
3028 TCGv_i64 reg = cpu_reg(s, a->rd);
3029 TCGv_i64 val = tcg_temp_new_i64();
3030
3031 do_cntp(s, val, a->esz, a->pg, a->pg);
3032 if (a->d) {
3033 tcg_gen_sub_i64(reg, reg, val);
3034 } else {
3035 tcg_gen_add_i64(reg, reg, val);
3036 }
3037 tcg_temp_free_i64(val);
3038 }
3039 return true;
3040 }
3041
3042 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3043 uint32_t insn)
3044 {
3045 if (a->esz == 0) {
3046 return false;
3047 }
3048 if (sve_access_check(s)) {
3049 unsigned vsz = vec_full_reg_size(s);
3050 TCGv_i64 val = tcg_temp_new_i64();
3051 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3052
3053 do_cntp(s, val, a->esz, a->pg, a->pg);
3054 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3055 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3056 }
3057 return true;
3058 }
3059
3060 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3061 uint32_t insn)
3062 {
3063 if (sve_access_check(s)) {
3064 TCGv_i64 reg = cpu_reg(s, a->rd);
3065 TCGv_i64 val = tcg_temp_new_i64();
3066
3067 do_cntp(s, val, a->esz, a->pg, a->pg);
3068 do_sat_addsub_32(reg, val, a->u, a->d);
3069 }
3070 return true;
3071 }
3072
3073 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3074 uint32_t insn)
3075 {
3076 if (sve_access_check(s)) {
3077 TCGv_i64 reg = cpu_reg(s, a->rd);
3078 TCGv_i64 val = tcg_temp_new_i64();
3079
3080 do_cntp(s, val, a->esz, a->pg, a->pg);
3081 do_sat_addsub_64(reg, val, a->u, a->d);
3082 }
3083 return true;
3084 }
3085
3086 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3087 uint32_t insn)
3088 {
3089 if (a->esz == 0) {
3090 return false;
3091 }
3092 if (sve_access_check(s)) {
3093 TCGv_i64 val = tcg_temp_new_i64();
3094 do_cntp(s, val, a->esz, a->pg, a->pg);
3095 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3096 }
3097 return true;
3098 }
3099
3100 /*
3101 *** SVE Integer Compare Scalars Group
3102 */
3103
3104 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3105 {
3106 if (!sve_access_check(s)) {
3107 return true;
3108 }
3109
3110 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3111 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3112 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3113 TCGv_i64 cmp = tcg_temp_new_i64();
3114
3115 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3116 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3117 tcg_temp_free_i64(cmp);
3118
3119 /* VF = !NF & !CF. */
3120 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3121 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3122
3123 /* Both NF and VF actually look at bit 31. */
3124 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3125 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3126 return true;
3127 }
3128
3129 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3130 {
3131 if (!sve_access_check(s)) {
3132 return true;
3133 }
3134
3135 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3136 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3137 TCGv_i64 t0 = tcg_temp_new_i64();
3138 TCGv_i64 t1 = tcg_temp_new_i64();
3139 TCGv_i32 t2, t3;
3140 TCGv_ptr ptr;
3141 unsigned desc, vsz = vec_full_reg_size(s);
3142 TCGCond cond;
3143
3144 if (!a->sf) {
3145 if (a->u) {
3146 tcg_gen_ext32u_i64(op0, op0);
3147 tcg_gen_ext32u_i64(op1, op1);
3148 } else {
3149 tcg_gen_ext32s_i64(op0, op0);
3150 tcg_gen_ext32s_i64(op1, op1);
3151 }
3152 }
3153
3154 /* For the helper, compress the different conditions into a computation
3155 * of how many iterations for which the condition is true.
3156 *
3157 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3158 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3159 * aren't that large, so any value >= predicate size is sufficient.
3160 */
3161 tcg_gen_sub_i64(t0, op1, op0);
3162
3163 /* t0 = MIN(op1 - op0, vsz). */
3164 tcg_gen_movi_i64(t1, vsz);
3165 tcg_gen_umin_i64(t0, t0, t1);
3166 if (a->eq) {
3167 /* Equality means one more iteration. */
3168 tcg_gen_addi_i64(t0, t0, 1);
3169 }
3170
3171 /* t0 = (condition true ? t0 : 0). */
3172 cond = (a->u
3173 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3174 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3175 tcg_gen_movi_i64(t1, 0);
3176 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3177
3178 t2 = tcg_temp_new_i32();
3179 tcg_gen_extrl_i64_i32(t2, t0);
3180 tcg_temp_free_i64(t0);
3181 tcg_temp_free_i64(t1);
3182
3183 desc = (vsz / 8) - 2;
3184 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3185 t3 = tcg_const_i32(desc);
3186
3187 ptr = tcg_temp_new_ptr();
3188 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3189
3190 gen_helper_sve_while(t2, ptr, t2, t3);
3191 do_pred_flags(t2);
3192
3193 tcg_temp_free_ptr(ptr);
3194 tcg_temp_free_i32(t2);
3195 tcg_temp_free_i32(t3);
3196 return true;
3197 }
3198
3199 /*
3200 *** SVE Integer Wide Immediate - Unpredicated Group
3201 */
3202
3203 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3204 {
3205 if (a->esz == 0) {
3206 return false;
3207 }
3208 if (sve_access_check(s)) {
3209 unsigned vsz = vec_full_reg_size(s);
3210 int dofs = vec_full_reg_offset(s, a->rd);
3211 uint64_t imm;
3212
3213 /* Decode the VFP immediate. */
3214 imm = vfp_expand_imm(a->esz, a->imm);
3215 imm = dup_const(a->esz, imm);
3216
3217 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3218 }
3219 return true;
3220 }
3221
3222 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3223 {
3224 if (a->esz == 0 && extract32(insn, 13, 1)) {
3225 return false;
3226 }
3227 if (sve_access_check(s)) {
3228 unsigned vsz = vec_full_reg_size(s);
3229 int dofs = vec_full_reg_offset(s, a->rd);
3230
3231 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3232 }
3233 return true;
3234 }
3235
3236 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3237 {
3238 if (a->esz == 0 && extract32(insn, 13, 1)) {
3239 return false;
3240 }
3241 if (sve_access_check(s)) {
3242 unsigned vsz = vec_full_reg_size(s);
3243 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3244 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3245 }
3246 return true;
3247 }
3248
3249 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3250 {
3251 a->imm = -a->imm;
3252 return trans_ADD_zzi(s, a, insn);
3253 }
3254
3255 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3256 {
3257 static const GVecGen2s op[4] = {
3258 { .fni8 = tcg_gen_vec_sub8_i64,
3259 .fniv = tcg_gen_sub_vec,
3260 .fno = gen_helper_sve_subri_b,
3261 .opc = INDEX_op_sub_vec,
3262 .vece = MO_8,
3263 .scalar_first = true },
3264 { .fni8 = tcg_gen_vec_sub16_i64,
3265 .fniv = tcg_gen_sub_vec,
3266 .fno = gen_helper_sve_subri_h,
3267 .opc = INDEX_op_sub_vec,
3268 .vece = MO_16,
3269 .scalar_first = true },
3270 { .fni4 = tcg_gen_sub_i32,
3271 .fniv = tcg_gen_sub_vec,
3272 .fno = gen_helper_sve_subri_s,
3273 .opc = INDEX_op_sub_vec,
3274 .vece = MO_32,
3275 .scalar_first = true },
3276 { .fni8 = tcg_gen_sub_i64,
3277 .fniv = tcg_gen_sub_vec,
3278 .fno = gen_helper_sve_subri_d,
3279 .opc = INDEX_op_sub_vec,
3280 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3281 .vece = MO_64,
3282 .scalar_first = true }
3283 };
3284
3285 if (a->esz == 0 && extract32(insn, 13, 1)) {
3286 return false;
3287 }
3288 if (sve_access_check(s)) {
3289 unsigned vsz = vec_full_reg_size(s);
3290 TCGv_i64 c = tcg_const_i64(a->imm);
3291 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3292 vec_full_reg_offset(s, a->rn),
3293 vsz, vsz, c, &op[a->esz]);
3294 tcg_temp_free_i64(c);
3295 }
3296 return true;
3297 }
3298
3299 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3300 {
3301 if (sve_access_check(s)) {
3302 unsigned vsz = vec_full_reg_size(s);
3303 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3304 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3305 }
3306 return true;
3307 }
3308
3309 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3310 bool u, bool d)
3311 {
3312 if (a->esz == 0 && extract32(insn, 13, 1)) {
3313 return false;
3314 }
3315 if (sve_access_check(s)) {
3316 TCGv_i64 val = tcg_const_i64(a->imm);
3317 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3318 tcg_temp_free_i64(val);
3319 }
3320 return true;
3321 }
3322
3323 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3324 {
3325 return do_zzi_sat(s, a, insn, false, false);
3326 }
3327
3328 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3329 {
3330 return do_zzi_sat(s, a, insn, true, false);
3331 }
3332
3333 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3334 {
3335 return do_zzi_sat(s, a, insn, false, true);
3336 }
3337
3338 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3339 {
3340 return do_zzi_sat(s, a, insn, true, true);
3341 }
3342
3343 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3344 {
3345 if (sve_access_check(s)) {
3346 unsigned vsz = vec_full_reg_size(s);
3347 TCGv_i64 c = tcg_const_i64(a->imm);
3348
3349 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3350 vec_full_reg_offset(s, a->rn),
3351 c, vsz, vsz, 0, fn);
3352 tcg_temp_free_i64(c);
3353 }
3354 return true;
3355 }
3356
3357 #define DO_ZZI(NAME, name) \
3358 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3359 uint32_t insn) \
3360 { \
3361 static gen_helper_gvec_2i * const fns[4] = { \
3362 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3363 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3364 }; \
3365 return do_zzi_ool(s, a, fns[a->esz]); \
3366 }
3367
3368 DO_ZZI(SMAX, smax)
3369 DO_ZZI(UMAX, umax)
3370 DO_ZZI(SMIN, smin)
3371 DO_ZZI(UMIN, umin)
3372
3373 #undef DO_ZZI
3374
3375 /*
3376 *** SVE Floating Point Arithmetic - Unpredicated Group
3377 */
3378
3379 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3380 gen_helper_gvec_3_ptr *fn)
3381 {
3382 if (fn == NULL) {
3383 return false;
3384 }
3385 if (sve_access_check(s)) {
3386 unsigned vsz = vec_full_reg_size(s);
3387 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3388 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3389 vec_full_reg_offset(s, a->rn),
3390 vec_full_reg_offset(s, a->rm),
3391 status, vsz, vsz, 0, fn);
3392 tcg_temp_free_ptr(status);
3393 }
3394 return true;
3395 }
3396
3397
3398 #define DO_FP3(NAME, name) \
3399 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3400 { \
3401 static gen_helper_gvec_3_ptr * const fns[4] = { \
3402 NULL, gen_helper_gvec_##name##_h, \
3403 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3404 }; \
3405 return do_zzz_fp(s, a, fns[a->esz]); \
3406 }
3407
3408 DO_FP3(FADD_zzz, fadd)
3409 DO_FP3(FSUB_zzz, fsub)
3410 DO_FP3(FMUL_zzz, fmul)
3411 DO_FP3(FTSMUL, ftsmul)
3412 DO_FP3(FRECPS, recps)
3413 DO_FP3(FRSQRTS, rsqrts)
3414
3415 #undef DO_FP3
3416
3417 /*
3418 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3419 */
3420
3421 /* Subroutine loading a vector register at VOFS of LEN bytes.
3422 * The load should begin at the address Rn + IMM.
3423 */
3424
3425 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3426 int rn, int imm)
3427 {
3428 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3429 uint32_t len_remain = len % 8;
3430 uint32_t nparts = len / 8 + ctpop8(len_remain);
3431 int midx = get_mem_index(s);
3432 TCGv_i64 addr, t0, t1;
3433
3434 addr = tcg_temp_new_i64();
3435 t0 = tcg_temp_new_i64();
3436
3437 /* Note that unpredicated load/store of vector/predicate registers
3438 * are defined as a stream of bytes, which equates to little-endian
3439 * operations on larger quantities. There is no nice way to force
3440 * a little-endian load for aarch64_be-linux-user out of line.
3441 *
3442 * Attempt to keep code expansion to a minimum by limiting the
3443 * amount of unrolling done.
3444 */
3445 if (nparts <= 4) {
3446 int i;
3447
3448 for (i = 0; i < len_align; i += 8) {
3449 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3450 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3451 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3452 }
3453 } else {
3454 TCGLabel *loop = gen_new_label();
3455 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3456
3457 gen_set_label(loop);
3458
3459 /* Minimize the number of local temps that must be re-read from
3460 * the stack each iteration. Instead, re-compute values other
3461 * than the loop counter.
3462 */
3463 tp = tcg_temp_new_ptr();
3464 tcg_gen_addi_ptr(tp, i, imm);
3465 tcg_gen_extu_ptr_i64(addr, tp);
3466 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3467
3468 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3469
3470 tcg_gen_add_ptr(tp, cpu_env, i);
3471 tcg_gen_addi_ptr(i, i, 8);
3472 tcg_gen_st_i64(t0, tp, vofs);
3473 tcg_temp_free_ptr(tp);
3474
3475 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3476 tcg_temp_free_ptr(i);
3477 }
3478
3479 /* Predicate register loads can be any multiple of 2.
3480 * Note that we still store the entire 64-bit unit into cpu_env.
3481 */
3482 if (len_remain) {
3483 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3484
3485 switch (len_remain) {
3486 case 2:
3487 case 4:
3488 case 8:
3489 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3490 break;
3491
3492 case 6:
3493 t1 = tcg_temp_new_i64();
3494 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3495 tcg_gen_addi_i64(addr, addr, 4);
3496 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3497 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3498 tcg_temp_free_i64(t1);
3499 break;
3500
3501 default:
3502 g_assert_not_reached();
3503 }
3504 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3505 }
3506 tcg_temp_free_i64(addr);
3507 tcg_temp_free_i64(t0);
3508 }
3509
3510 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3511 {
3512 if (sve_access_check(s)) {
3513 int size = vec_full_reg_size(s);
3514 int off = vec_full_reg_offset(s, a->rd);
3515 do_ldr(s, off, size, a->rn, a->imm * size);
3516 }
3517 return true;
3518 }
3519
3520 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3521 {
3522 if (sve_access_check(s)) {
3523 int size = pred_full_reg_size(s);
3524 int off = pred_full_reg_offset(s, a->rd);
3525 do_ldr(s, off, size, a->rn, a->imm * size);
3526 }
3527 return true;
3528 }