]> git.proxmox.com Git - mirror_qemu.git/blob - target/arm/translate-sve.c
target/arm: Implement SVE integer convert to floating-point
[mirror_qemu.git] / target / arm / translate-sve.c
1 /*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35
36
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
46
47 /*
48 * Helpers for extracting complex instruction fields.
49 */
50
51 /* See e.g. ASR (immediate, predicated).
52 * Returns -1 for unallocated encoding; diagnose later.
53 */
54 static int tszimm_esz(int x)
55 {
56 x >>= 3; /* discard imm3 */
57 return 31 - clz32(x);
58 }
59
60 static int tszimm_shr(int x)
61 {
62 return (16 << tszimm_esz(x)) - x;
63 }
64
65 /* See e.g. LSL (immediate, predicated). */
66 static int tszimm_shl(int x)
67 {
68 return x - (8 << tszimm_esz(x));
69 }
70
71 static inline int plus1(int x)
72 {
73 return x + 1;
74 }
75
76 /* The SH bit is in bit 8. Extract the low 8 and shift. */
77 static inline int expand_imm_sh8s(int x)
78 {
79 return (int8_t)x << (x & 0x100 ? 8 : 0);
80 }
81
82 static inline int expand_imm_sh8u(int x)
83 {
84 return (uint8_t)x << (x & 0x100 ? 8 : 0);
85 }
86
87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
88 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
89 */
90 static inline int msz_dtype(int msz)
91 {
92 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
93 return dtype[msz];
94 }
95
96 /*
97 * Include the generated decoder.
98 */
99
100 #include "decode-sve.inc.c"
101
102 /*
103 * Implement all of the translator functions referenced by the decoder.
104 */
105
106 /* Return the offset info CPUARMState of the predicate vector register Pn.
107 * Note for this purpose, FFR is P16.
108 */
109 static inline int pred_full_reg_offset(DisasContext *s, int regno)
110 {
111 return offsetof(CPUARMState, vfp.pregs[regno]);
112 }
113
114 /* Return the byte size of the whole predicate register, VL / 64. */
115 static inline int pred_full_reg_size(DisasContext *s)
116 {
117 return s->sve_len >> 3;
118 }
119
120 /* Round up the size of a register to a size allowed by
121 * the tcg vector infrastructure. Any operation which uses this
122 * size may assume that the bits above pred_full_reg_size are zero,
123 * and must leave them the same way.
124 *
125 * Note that this is not needed for the vector registers as they
126 * are always properly sized for tcg vectors.
127 */
128 static int size_for_gvec(int size)
129 {
130 if (size <= 8) {
131 return 8;
132 } else {
133 return QEMU_ALIGN_UP(size, 16);
134 }
135 }
136
137 static int pred_gvec_reg_size(DisasContext *s)
138 {
139 return size_for_gvec(pred_full_reg_size(s));
140 }
141
142 /* Invoke a vector expander on two Zregs. */
143 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
144 int esz, int rd, int rn)
145 {
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 gvec_fn(esz, vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn), vsz, vsz);
150 }
151 return true;
152 }
153
154 /* Invoke a vector expander on three Zregs. */
155 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
156 int esz, int rd, int rn, int rm)
157 {
158 if (sve_access_check(s)) {
159 unsigned vsz = vec_full_reg_size(s);
160 gvec_fn(esz, vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm), vsz, vsz);
163 }
164 return true;
165 }
166
167 /* Invoke a vector move on two Zregs. */
168 static bool do_mov_z(DisasContext *s, int rd, int rn)
169 {
170 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
171 }
172
173 /* Initialize a Zreg with replications of a 64-bit immediate. */
174 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
175 {
176 unsigned vsz = vec_full_reg_size(s);
177 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
178 }
179
180 /* Invoke a vector expander on two Pregs. */
181 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
182 int esz, int rd, int rn)
183 {
184 if (sve_access_check(s)) {
185 unsigned psz = pred_gvec_reg_size(s);
186 gvec_fn(esz, pred_full_reg_offset(s, rd),
187 pred_full_reg_offset(s, rn), psz, psz);
188 }
189 return true;
190 }
191
192 /* Invoke a vector expander on three Pregs. */
193 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
194 int esz, int rd, int rn, int rm)
195 {
196 if (sve_access_check(s)) {
197 unsigned psz = pred_gvec_reg_size(s);
198 gvec_fn(esz, pred_full_reg_offset(s, rd),
199 pred_full_reg_offset(s, rn),
200 pred_full_reg_offset(s, rm), psz, psz);
201 }
202 return true;
203 }
204
205 /* Invoke a vector operation on four Pregs. */
206 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
207 int rd, int rn, int rm, int rg)
208 {
209 if (sve_access_check(s)) {
210 unsigned psz = pred_gvec_reg_size(s);
211 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
212 pred_full_reg_offset(s, rn),
213 pred_full_reg_offset(s, rm),
214 pred_full_reg_offset(s, rg),
215 psz, psz, gvec_op);
216 }
217 return true;
218 }
219
220 /* Invoke a vector move on two Pregs. */
221 static bool do_mov_p(DisasContext *s, int rd, int rn)
222 {
223 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
224 }
225
226 /* Set the cpu flags as per a return from an SVE helper. */
227 static void do_pred_flags(TCGv_i32 t)
228 {
229 tcg_gen_mov_i32(cpu_NF, t);
230 tcg_gen_andi_i32(cpu_ZF, t, 2);
231 tcg_gen_andi_i32(cpu_CF, t, 1);
232 tcg_gen_movi_i32(cpu_VF, 0);
233 }
234
235 /* Subroutines computing the ARM PredTest psuedofunction. */
236 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
237 {
238 TCGv_i32 t = tcg_temp_new_i32();
239
240 gen_helper_sve_predtest1(t, d, g);
241 do_pred_flags(t);
242 tcg_temp_free_i32(t);
243 }
244
245 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
246 {
247 TCGv_ptr dptr = tcg_temp_new_ptr();
248 TCGv_ptr gptr = tcg_temp_new_ptr();
249 TCGv_i32 t;
250
251 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
252 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
253 t = tcg_const_i32(words);
254
255 gen_helper_sve_predtest(t, dptr, gptr, t);
256 tcg_temp_free_ptr(dptr);
257 tcg_temp_free_ptr(gptr);
258
259 do_pred_flags(t);
260 tcg_temp_free_i32(t);
261 }
262
263 /* For each element size, the bits within a predicate word that are active. */
264 const uint64_t pred_esz_masks[4] = {
265 0xffffffffffffffffull, 0x5555555555555555ull,
266 0x1111111111111111ull, 0x0101010101010101ull
267 };
268
269 /*
270 *** SVE Logical - Unpredicated Group
271 */
272
273 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
274 {
275 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
276 }
277
278 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
279 {
280 if (a->rn == a->rm) { /* MOV */
281 return do_mov_z(s, a->rd, a->rn);
282 } else {
283 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
284 }
285 }
286
287 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
288 {
289 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
290 }
291
292 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
293 {
294 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
295 }
296
297 /*
298 *** SVE Integer Arithmetic - Unpredicated Group
299 */
300
301 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
302 {
303 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
304 }
305
306 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
307 {
308 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
309 }
310
311 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
312 {
313 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
314 }
315
316 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
317 {
318 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
319 }
320
321 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
322 {
323 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
324 }
325
326 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
327 {
328 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
329 }
330
331 /*
332 *** SVE Integer Arithmetic - Binary Predicated Group
333 */
334
335 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
336 {
337 unsigned vsz = vec_full_reg_size(s);
338 if (fn == NULL) {
339 return false;
340 }
341 if (sve_access_check(s)) {
342 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
343 vec_full_reg_offset(s, a->rn),
344 vec_full_reg_offset(s, a->rm),
345 pred_full_reg_offset(s, a->pg),
346 vsz, vsz, 0, fn);
347 }
348 return true;
349 }
350
351 #define DO_ZPZZ(NAME, name) \
352 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
353 uint32_t insn) \
354 { \
355 static gen_helper_gvec_4 * const fns[4] = { \
356 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
357 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
358 }; \
359 return do_zpzz_ool(s, a, fns[a->esz]); \
360 }
361
362 DO_ZPZZ(AND, and)
363 DO_ZPZZ(EOR, eor)
364 DO_ZPZZ(ORR, orr)
365 DO_ZPZZ(BIC, bic)
366
367 DO_ZPZZ(ADD, add)
368 DO_ZPZZ(SUB, sub)
369
370 DO_ZPZZ(SMAX, smax)
371 DO_ZPZZ(UMAX, umax)
372 DO_ZPZZ(SMIN, smin)
373 DO_ZPZZ(UMIN, umin)
374 DO_ZPZZ(SABD, sabd)
375 DO_ZPZZ(UABD, uabd)
376
377 DO_ZPZZ(MUL, mul)
378 DO_ZPZZ(SMULH, smulh)
379 DO_ZPZZ(UMULH, umulh)
380
381 DO_ZPZZ(ASR, asr)
382 DO_ZPZZ(LSR, lsr)
383 DO_ZPZZ(LSL, lsl)
384
385 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
386 {
387 static gen_helper_gvec_4 * const fns[4] = {
388 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
389 };
390 return do_zpzz_ool(s, a, fns[a->esz]);
391 }
392
393 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
394 {
395 static gen_helper_gvec_4 * const fns[4] = {
396 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
397 };
398 return do_zpzz_ool(s, a, fns[a->esz]);
399 }
400
401 DO_ZPZZ(SEL, sel)
402
403 #undef DO_ZPZZ
404
405 /*
406 *** SVE Integer Arithmetic - Unary Predicated Group
407 */
408
409 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
410 {
411 if (fn == NULL) {
412 return false;
413 }
414 if (sve_access_check(s)) {
415 unsigned vsz = vec_full_reg_size(s);
416 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
417 vec_full_reg_offset(s, a->rn),
418 pred_full_reg_offset(s, a->pg),
419 vsz, vsz, 0, fn);
420 }
421 return true;
422 }
423
424 #define DO_ZPZ(NAME, name) \
425 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
426 { \
427 static gen_helper_gvec_3 * const fns[4] = { \
428 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
429 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
430 }; \
431 return do_zpz_ool(s, a, fns[a->esz]); \
432 }
433
434 DO_ZPZ(CLS, cls)
435 DO_ZPZ(CLZ, clz)
436 DO_ZPZ(CNT_zpz, cnt_zpz)
437 DO_ZPZ(CNOT, cnot)
438 DO_ZPZ(NOT_zpz, not_zpz)
439 DO_ZPZ(ABS, abs)
440 DO_ZPZ(NEG, neg)
441
442 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
443 {
444 static gen_helper_gvec_3 * const fns[4] = {
445 NULL,
446 gen_helper_sve_fabs_h,
447 gen_helper_sve_fabs_s,
448 gen_helper_sve_fabs_d
449 };
450 return do_zpz_ool(s, a, fns[a->esz]);
451 }
452
453 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
454 {
455 static gen_helper_gvec_3 * const fns[4] = {
456 NULL,
457 gen_helper_sve_fneg_h,
458 gen_helper_sve_fneg_s,
459 gen_helper_sve_fneg_d
460 };
461 return do_zpz_ool(s, a, fns[a->esz]);
462 }
463
464 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
465 {
466 static gen_helper_gvec_3 * const fns[4] = {
467 NULL,
468 gen_helper_sve_sxtb_h,
469 gen_helper_sve_sxtb_s,
470 gen_helper_sve_sxtb_d
471 };
472 return do_zpz_ool(s, a, fns[a->esz]);
473 }
474
475 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
476 {
477 static gen_helper_gvec_3 * const fns[4] = {
478 NULL,
479 gen_helper_sve_uxtb_h,
480 gen_helper_sve_uxtb_s,
481 gen_helper_sve_uxtb_d
482 };
483 return do_zpz_ool(s, a, fns[a->esz]);
484 }
485
486 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
487 {
488 static gen_helper_gvec_3 * const fns[4] = {
489 NULL, NULL,
490 gen_helper_sve_sxth_s,
491 gen_helper_sve_sxth_d
492 };
493 return do_zpz_ool(s, a, fns[a->esz]);
494 }
495
496 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
497 {
498 static gen_helper_gvec_3 * const fns[4] = {
499 NULL, NULL,
500 gen_helper_sve_uxth_s,
501 gen_helper_sve_uxth_d
502 };
503 return do_zpz_ool(s, a, fns[a->esz]);
504 }
505
506 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
507 {
508 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
509 }
510
511 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
512 {
513 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
514 }
515
516 #undef DO_ZPZ
517
518 /*
519 *** SVE Integer Reduction Group
520 */
521
522 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
523 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
524 gen_helper_gvec_reduc *fn)
525 {
526 unsigned vsz = vec_full_reg_size(s);
527 TCGv_ptr t_zn, t_pg;
528 TCGv_i32 desc;
529 TCGv_i64 temp;
530
531 if (fn == NULL) {
532 return false;
533 }
534 if (!sve_access_check(s)) {
535 return true;
536 }
537
538 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
539 temp = tcg_temp_new_i64();
540 t_zn = tcg_temp_new_ptr();
541 t_pg = tcg_temp_new_ptr();
542
543 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
544 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
545 fn(temp, t_zn, t_pg, desc);
546 tcg_temp_free_ptr(t_zn);
547 tcg_temp_free_ptr(t_pg);
548 tcg_temp_free_i32(desc);
549
550 write_fp_dreg(s, a->rd, temp);
551 tcg_temp_free_i64(temp);
552 return true;
553 }
554
555 #define DO_VPZ(NAME, name) \
556 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
557 { \
558 static gen_helper_gvec_reduc * const fns[4] = { \
559 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
560 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
561 }; \
562 return do_vpz_ool(s, a, fns[a->esz]); \
563 }
564
565 DO_VPZ(ORV, orv)
566 DO_VPZ(ANDV, andv)
567 DO_VPZ(EORV, eorv)
568
569 DO_VPZ(UADDV, uaddv)
570 DO_VPZ(SMAXV, smaxv)
571 DO_VPZ(UMAXV, umaxv)
572 DO_VPZ(SMINV, sminv)
573 DO_VPZ(UMINV, uminv)
574
575 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
576 {
577 static gen_helper_gvec_reduc * const fns[4] = {
578 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
579 gen_helper_sve_saddv_s, NULL
580 };
581 return do_vpz_ool(s, a, fns[a->esz]);
582 }
583
584 #undef DO_VPZ
585
586 /*
587 *** SVE Shift by Immediate - Predicated Group
588 */
589
590 /* Store zero into every active element of Zd. We will use this for two
591 * and three-operand predicated instructions for which logic dictates a
592 * zero result.
593 */
594 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
595 {
596 static gen_helper_gvec_2 * const fns[4] = {
597 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
598 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
599 };
600 if (sve_access_check(s)) {
601 unsigned vsz = vec_full_reg_size(s);
602 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
603 pred_full_reg_offset(s, pg),
604 vsz, vsz, 0, fns[esz]);
605 }
606 return true;
607 }
608
609 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
610 gen_helper_gvec_3 *fn)
611 {
612 if (sve_access_check(s)) {
613 unsigned vsz = vec_full_reg_size(s);
614 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
615 vec_full_reg_offset(s, a->rn),
616 pred_full_reg_offset(s, a->pg),
617 vsz, vsz, a->imm, fn);
618 }
619 return true;
620 }
621
622 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
623 {
624 static gen_helper_gvec_3 * const fns[4] = {
625 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
626 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
627 };
628 if (a->esz < 0) {
629 /* Invalid tsz encoding -- see tszimm_esz. */
630 return false;
631 }
632 /* Shift by element size is architecturally valid. For
633 arithmetic right-shift, it's the same as by one less. */
634 a->imm = MIN(a->imm, (8 << a->esz) - 1);
635 return do_zpzi_ool(s, a, fns[a->esz]);
636 }
637
638 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
639 {
640 static gen_helper_gvec_3 * const fns[4] = {
641 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
642 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
643 };
644 if (a->esz < 0) {
645 return false;
646 }
647 /* Shift by element size is architecturally valid.
648 For logical shifts, it is a zeroing operation. */
649 if (a->imm >= (8 << a->esz)) {
650 return do_clr_zp(s, a->rd, a->pg, a->esz);
651 } else {
652 return do_zpzi_ool(s, a, fns[a->esz]);
653 }
654 }
655
656 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
657 {
658 static gen_helper_gvec_3 * const fns[4] = {
659 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
660 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
661 };
662 if (a->esz < 0) {
663 return false;
664 }
665 /* Shift by element size is architecturally valid.
666 For logical shifts, it is a zeroing operation. */
667 if (a->imm >= (8 << a->esz)) {
668 return do_clr_zp(s, a->rd, a->pg, a->esz);
669 } else {
670 return do_zpzi_ool(s, a, fns[a->esz]);
671 }
672 }
673
674 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
675 {
676 static gen_helper_gvec_3 * const fns[4] = {
677 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
678 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
679 };
680 if (a->esz < 0) {
681 return false;
682 }
683 /* Shift by element size is architecturally valid. For arithmetic
684 right shift for division, it is a zeroing operation. */
685 if (a->imm >= (8 << a->esz)) {
686 return do_clr_zp(s, a->rd, a->pg, a->esz);
687 } else {
688 return do_zpzi_ool(s, a, fns[a->esz]);
689 }
690 }
691
692 /*
693 *** SVE Bitwise Shift - Predicated Group
694 */
695
696 #define DO_ZPZW(NAME, name) \
697 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
698 uint32_t insn) \
699 { \
700 static gen_helper_gvec_4 * const fns[3] = { \
701 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
702 gen_helper_sve_##name##_zpzw_s, \
703 }; \
704 if (a->esz < 0 || a->esz >= 3) { \
705 return false; \
706 } \
707 return do_zpzz_ool(s, a, fns[a->esz]); \
708 }
709
710 DO_ZPZW(ASR, asr)
711 DO_ZPZW(LSR, lsr)
712 DO_ZPZW(LSL, lsl)
713
714 #undef DO_ZPZW
715
716 /*
717 *** SVE Bitwise Shift - Unpredicated Group
718 */
719
720 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
721 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
722 int64_t, uint32_t, uint32_t))
723 {
724 if (a->esz < 0) {
725 /* Invalid tsz encoding -- see tszimm_esz. */
726 return false;
727 }
728 if (sve_access_check(s)) {
729 unsigned vsz = vec_full_reg_size(s);
730 /* Shift by element size is architecturally valid. For
731 arithmetic right-shift, it's the same as by one less.
732 Otherwise it is a zeroing operation. */
733 if (a->imm >= 8 << a->esz) {
734 if (asr) {
735 a->imm = (8 << a->esz) - 1;
736 } else {
737 do_dupi_z(s, a->rd, 0);
738 return true;
739 }
740 }
741 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
742 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
743 }
744 return true;
745 }
746
747 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
748 {
749 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
750 }
751
752 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
753 {
754 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
755 }
756
757 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
758 {
759 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
760 }
761
762 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
763 {
764 if (fn == NULL) {
765 return false;
766 }
767 if (sve_access_check(s)) {
768 unsigned vsz = vec_full_reg_size(s);
769 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
770 vec_full_reg_offset(s, a->rn),
771 vec_full_reg_offset(s, a->rm),
772 vsz, vsz, 0, fn);
773 }
774 return true;
775 }
776
777 #define DO_ZZW(NAME, name) \
778 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
779 uint32_t insn) \
780 { \
781 static gen_helper_gvec_3 * const fns[4] = { \
782 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
783 gen_helper_sve_##name##_zzw_s, NULL \
784 }; \
785 return do_zzw_ool(s, a, fns[a->esz]); \
786 }
787
788 DO_ZZW(ASR, asr)
789 DO_ZZW(LSR, lsr)
790 DO_ZZW(LSL, lsl)
791
792 #undef DO_ZZW
793
794 /*
795 *** SVE Integer Multiply-Add Group
796 */
797
798 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
799 gen_helper_gvec_5 *fn)
800 {
801 if (sve_access_check(s)) {
802 unsigned vsz = vec_full_reg_size(s);
803 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
804 vec_full_reg_offset(s, a->ra),
805 vec_full_reg_offset(s, a->rn),
806 vec_full_reg_offset(s, a->rm),
807 pred_full_reg_offset(s, a->pg),
808 vsz, vsz, 0, fn);
809 }
810 return true;
811 }
812
813 #define DO_ZPZZZ(NAME, name) \
814 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
815 { \
816 static gen_helper_gvec_5 * const fns[4] = { \
817 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
818 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
819 }; \
820 return do_zpzzz_ool(s, a, fns[a->esz]); \
821 }
822
823 DO_ZPZZZ(MLA, mla)
824 DO_ZPZZZ(MLS, mls)
825
826 #undef DO_ZPZZZ
827
828 /*
829 *** SVE Index Generation Group
830 */
831
832 static void do_index(DisasContext *s, int esz, int rd,
833 TCGv_i64 start, TCGv_i64 incr)
834 {
835 unsigned vsz = vec_full_reg_size(s);
836 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
837 TCGv_ptr t_zd = tcg_temp_new_ptr();
838
839 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
840 if (esz == 3) {
841 gen_helper_sve_index_d(t_zd, start, incr, desc);
842 } else {
843 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
844 static index_fn * const fns[3] = {
845 gen_helper_sve_index_b,
846 gen_helper_sve_index_h,
847 gen_helper_sve_index_s,
848 };
849 TCGv_i32 s32 = tcg_temp_new_i32();
850 TCGv_i32 i32 = tcg_temp_new_i32();
851
852 tcg_gen_extrl_i64_i32(s32, start);
853 tcg_gen_extrl_i64_i32(i32, incr);
854 fns[esz](t_zd, s32, i32, desc);
855
856 tcg_temp_free_i32(s32);
857 tcg_temp_free_i32(i32);
858 }
859 tcg_temp_free_ptr(t_zd);
860 tcg_temp_free_i32(desc);
861 }
862
863 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
864 {
865 if (sve_access_check(s)) {
866 TCGv_i64 start = tcg_const_i64(a->imm1);
867 TCGv_i64 incr = tcg_const_i64(a->imm2);
868 do_index(s, a->esz, a->rd, start, incr);
869 tcg_temp_free_i64(start);
870 tcg_temp_free_i64(incr);
871 }
872 return true;
873 }
874
875 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
876 {
877 if (sve_access_check(s)) {
878 TCGv_i64 start = tcg_const_i64(a->imm);
879 TCGv_i64 incr = cpu_reg(s, a->rm);
880 do_index(s, a->esz, a->rd, start, incr);
881 tcg_temp_free_i64(start);
882 }
883 return true;
884 }
885
886 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
887 {
888 if (sve_access_check(s)) {
889 TCGv_i64 start = cpu_reg(s, a->rn);
890 TCGv_i64 incr = tcg_const_i64(a->imm);
891 do_index(s, a->esz, a->rd, start, incr);
892 tcg_temp_free_i64(incr);
893 }
894 return true;
895 }
896
897 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
898 {
899 if (sve_access_check(s)) {
900 TCGv_i64 start = cpu_reg(s, a->rn);
901 TCGv_i64 incr = cpu_reg(s, a->rm);
902 do_index(s, a->esz, a->rd, start, incr);
903 }
904 return true;
905 }
906
907 /*
908 *** SVE Stack Allocation Group
909 */
910
911 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
912 {
913 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
914 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
915 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
916 return true;
917 }
918
919 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
920 {
921 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
922 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
923 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
924 return true;
925 }
926
927 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
928 {
929 TCGv_i64 reg = cpu_reg(s, a->rd);
930 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
931 return true;
932 }
933
934 /*
935 *** SVE Compute Vector Address Group
936 */
937
938 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
939 {
940 if (sve_access_check(s)) {
941 unsigned vsz = vec_full_reg_size(s);
942 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
943 vec_full_reg_offset(s, a->rn),
944 vec_full_reg_offset(s, a->rm),
945 vsz, vsz, a->imm, fn);
946 }
947 return true;
948 }
949
950 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
951 {
952 return do_adr(s, a, gen_helper_sve_adr_p32);
953 }
954
955 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
956 {
957 return do_adr(s, a, gen_helper_sve_adr_p64);
958 }
959
960 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
961 {
962 return do_adr(s, a, gen_helper_sve_adr_s32);
963 }
964
965 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
966 {
967 return do_adr(s, a, gen_helper_sve_adr_u32);
968 }
969
970 /*
971 *** SVE Integer Misc - Unpredicated Group
972 */
973
974 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
975 {
976 static gen_helper_gvec_2 * const fns[4] = {
977 NULL,
978 gen_helper_sve_fexpa_h,
979 gen_helper_sve_fexpa_s,
980 gen_helper_sve_fexpa_d,
981 };
982 if (a->esz == 0) {
983 return false;
984 }
985 if (sve_access_check(s)) {
986 unsigned vsz = vec_full_reg_size(s);
987 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
988 vec_full_reg_offset(s, a->rn),
989 vsz, vsz, 0, fns[a->esz]);
990 }
991 return true;
992 }
993
994 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
995 {
996 static gen_helper_gvec_3 * const fns[4] = {
997 NULL,
998 gen_helper_sve_ftssel_h,
999 gen_helper_sve_ftssel_s,
1000 gen_helper_sve_ftssel_d,
1001 };
1002 if (a->esz == 0) {
1003 return false;
1004 }
1005 if (sve_access_check(s)) {
1006 unsigned vsz = vec_full_reg_size(s);
1007 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1008 vec_full_reg_offset(s, a->rn),
1009 vec_full_reg_offset(s, a->rm),
1010 vsz, vsz, 0, fns[a->esz]);
1011 }
1012 return true;
1013 }
1014
1015 /*
1016 *** SVE Predicate Logical Operations Group
1017 */
1018
1019 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1020 const GVecGen4 *gvec_op)
1021 {
1022 if (!sve_access_check(s)) {
1023 return true;
1024 }
1025
1026 unsigned psz = pred_gvec_reg_size(s);
1027 int dofs = pred_full_reg_offset(s, a->rd);
1028 int nofs = pred_full_reg_offset(s, a->rn);
1029 int mofs = pred_full_reg_offset(s, a->rm);
1030 int gofs = pred_full_reg_offset(s, a->pg);
1031
1032 if (psz == 8) {
1033 /* Do the operation and the flags generation in temps. */
1034 TCGv_i64 pd = tcg_temp_new_i64();
1035 TCGv_i64 pn = tcg_temp_new_i64();
1036 TCGv_i64 pm = tcg_temp_new_i64();
1037 TCGv_i64 pg = tcg_temp_new_i64();
1038
1039 tcg_gen_ld_i64(pn, cpu_env, nofs);
1040 tcg_gen_ld_i64(pm, cpu_env, mofs);
1041 tcg_gen_ld_i64(pg, cpu_env, gofs);
1042
1043 gvec_op->fni8(pd, pn, pm, pg);
1044 tcg_gen_st_i64(pd, cpu_env, dofs);
1045
1046 do_predtest1(pd, pg);
1047
1048 tcg_temp_free_i64(pd);
1049 tcg_temp_free_i64(pn);
1050 tcg_temp_free_i64(pm);
1051 tcg_temp_free_i64(pg);
1052 } else {
1053 /* The operation and flags generation is large. The computation
1054 * of the flags depends on the original contents of the guarding
1055 * predicate. If the destination overwrites the guarding predicate,
1056 * then the easiest way to get this right is to save a copy.
1057 */
1058 int tofs = gofs;
1059 if (a->rd == a->pg) {
1060 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1061 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1062 }
1063
1064 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1065 do_predtest(s, dofs, tofs, psz / 8);
1066 }
1067 return true;
1068 }
1069
1070 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1071 {
1072 tcg_gen_and_i64(pd, pn, pm);
1073 tcg_gen_and_i64(pd, pd, pg);
1074 }
1075
1076 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1077 TCGv_vec pm, TCGv_vec pg)
1078 {
1079 tcg_gen_and_vec(vece, pd, pn, pm);
1080 tcg_gen_and_vec(vece, pd, pd, pg);
1081 }
1082
1083 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1084 {
1085 static const GVecGen4 op = {
1086 .fni8 = gen_and_pg_i64,
1087 .fniv = gen_and_pg_vec,
1088 .fno = gen_helper_sve_and_pppp,
1089 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1090 };
1091 if (a->s) {
1092 return do_pppp_flags(s, a, &op);
1093 } else if (a->rn == a->rm) {
1094 if (a->pg == a->rn) {
1095 return do_mov_p(s, a->rd, a->rn);
1096 } else {
1097 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1098 }
1099 } else if (a->pg == a->rn || a->pg == a->rm) {
1100 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1101 } else {
1102 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1103 }
1104 }
1105
1106 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1107 {
1108 tcg_gen_andc_i64(pd, pn, pm);
1109 tcg_gen_and_i64(pd, pd, pg);
1110 }
1111
1112 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1113 TCGv_vec pm, TCGv_vec pg)
1114 {
1115 tcg_gen_andc_vec(vece, pd, pn, pm);
1116 tcg_gen_and_vec(vece, pd, pd, pg);
1117 }
1118
1119 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1120 {
1121 static const GVecGen4 op = {
1122 .fni8 = gen_bic_pg_i64,
1123 .fniv = gen_bic_pg_vec,
1124 .fno = gen_helper_sve_bic_pppp,
1125 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1126 };
1127 if (a->s) {
1128 return do_pppp_flags(s, a, &op);
1129 } else if (a->pg == a->rn) {
1130 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1131 } else {
1132 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1133 }
1134 }
1135
1136 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1137 {
1138 tcg_gen_xor_i64(pd, pn, pm);
1139 tcg_gen_and_i64(pd, pd, pg);
1140 }
1141
1142 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1143 TCGv_vec pm, TCGv_vec pg)
1144 {
1145 tcg_gen_xor_vec(vece, pd, pn, pm);
1146 tcg_gen_and_vec(vece, pd, pd, pg);
1147 }
1148
1149 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1150 {
1151 static const GVecGen4 op = {
1152 .fni8 = gen_eor_pg_i64,
1153 .fniv = gen_eor_pg_vec,
1154 .fno = gen_helper_sve_eor_pppp,
1155 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1156 };
1157 if (a->s) {
1158 return do_pppp_flags(s, a, &op);
1159 } else {
1160 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1161 }
1162 }
1163
1164 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1165 {
1166 tcg_gen_and_i64(pn, pn, pg);
1167 tcg_gen_andc_i64(pm, pm, pg);
1168 tcg_gen_or_i64(pd, pn, pm);
1169 }
1170
1171 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1172 TCGv_vec pm, TCGv_vec pg)
1173 {
1174 tcg_gen_and_vec(vece, pn, pn, pg);
1175 tcg_gen_andc_vec(vece, pm, pm, pg);
1176 tcg_gen_or_vec(vece, pd, pn, pm);
1177 }
1178
1179 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1180 {
1181 static const GVecGen4 op = {
1182 .fni8 = gen_sel_pg_i64,
1183 .fniv = gen_sel_pg_vec,
1184 .fno = gen_helper_sve_sel_pppp,
1185 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1186 };
1187 if (a->s) {
1188 return false;
1189 } else {
1190 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1191 }
1192 }
1193
1194 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1195 {
1196 tcg_gen_or_i64(pd, pn, pm);
1197 tcg_gen_and_i64(pd, pd, pg);
1198 }
1199
1200 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1201 TCGv_vec pm, TCGv_vec pg)
1202 {
1203 tcg_gen_or_vec(vece, pd, pn, pm);
1204 tcg_gen_and_vec(vece, pd, pd, pg);
1205 }
1206
1207 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1208 {
1209 static const GVecGen4 op = {
1210 .fni8 = gen_orr_pg_i64,
1211 .fniv = gen_orr_pg_vec,
1212 .fno = gen_helper_sve_orr_pppp,
1213 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1214 };
1215 if (a->s) {
1216 return do_pppp_flags(s, a, &op);
1217 } else if (a->pg == a->rn && a->rn == a->rm) {
1218 return do_mov_p(s, a->rd, a->rn);
1219 } else {
1220 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1221 }
1222 }
1223
1224 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1225 {
1226 tcg_gen_orc_i64(pd, pn, pm);
1227 tcg_gen_and_i64(pd, pd, pg);
1228 }
1229
1230 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1231 TCGv_vec pm, TCGv_vec pg)
1232 {
1233 tcg_gen_orc_vec(vece, pd, pn, pm);
1234 tcg_gen_and_vec(vece, pd, pd, pg);
1235 }
1236
1237 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1238 {
1239 static const GVecGen4 op = {
1240 .fni8 = gen_orn_pg_i64,
1241 .fniv = gen_orn_pg_vec,
1242 .fno = gen_helper_sve_orn_pppp,
1243 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1244 };
1245 if (a->s) {
1246 return do_pppp_flags(s, a, &op);
1247 } else {
1248 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1249 }
1250 }
1251
1252 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1253 {
1254 tcg_gen_or_i64(pd, pn, pm);
1255 tcg_gen_andc_i64(pd, pg, pd);
1256 }
1257
1258 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1259 TCGv_vec pm, TCGv_vec pg)
1260 {
1261 tcg_gen_or_vec(vece, pd, pn, pm);
1262 tcg_gen_andc_vec(vece, pd, pg, pd);
1263 }
1264
1265 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1266 {
1267 static const GVecGen4 op = {
1268 .fni8 = gen_nor_pg_i64,
1269 .fniv = gen_nor_pg_vec,
1270 .fno = gen_helper_sve_nor_pppp,
1271 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1272 };
1273 if (a->s) {
1274 return do_pppp_flags(s, a, &op);
1275 } else {
1276 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1277 }
1278 }
1279
1280 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1281 {
1282 tcg_gen_and_i64(pd, pn, pm);
1283 tcg_gen_andc_i64(pd, pg, pd);
1284 }
1285
1286 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1287 TCGv_vec pm, TCGv_vec pg)
1288 {
1289 tcg_gen_and_vec(vece, pd, pn, pm);
1290 tcg_gen_andc_vec(vece, pd, pg, pd);
1291 }
1292
1293 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1294 {
1295 static const GVecGen4 op = {
1296 .fni8 = gen_nand_pg_i64,
1297 .fniv = gen_nand_pg_vec,
1298 .fno = gen_helper_sve_nand_pppp,
1299 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1300 };
1301 if (a->s) {
1302 return do_pppp_flags(s, a, &op);
1303 } else {
1304 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1305 }
1306 }
1307
1308 /*
1309 *** SVE Predicate Misc Group
1310 */
1311
1312 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1313 {
1314 if (sve_access_check(s)) {
1315 int nofs = pred_full_reg_offset(s, a->rn);
1316 int gofs = pred_full_reg_offset(s, a->pg);
1317 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1318
1319 if (words == 1) {
1320 TCGv_i64 pn = tcg_temp_new_i64();
1321 TCGv_i64 pg = tcg_temp_new_i64();
1322
1323 tcg_gen_ld_i64(pn, cpu_env, nofs);
1324 tcg_gen_ld_i64(pg, cpu_env, gofs);
1325 do_predtest1(pn, pg);
1326
1327 tcg_temp_free_i64(pn);
1328 tcg_temp_free_i64(pg);
1329 } else {
1330 do_predtest(s, nofs, gofs, words);
1331 }
1332 }
1333 return true;
1334 }
1335
1336 /* See the ARM pseudocode DecodePredCount. */
1337 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1338 {
1339 unsigned elements = fullsz >> esz;
1340 unsigned bound;
1341
1342 switch (pattern) {
1343 case 0x0: /* POW2 */
1344 return pow2floor(elements);
1345 case 0x1: /* VL1 */
1346 case 0x2: /* VL2 */
1347 case 0x3: /* VL3 */
1348 case 0x4: /* VL4 */
1349 case 0x5: /* VL5 */
1350 case 0x6: /* VL6 */
1351 case 0x7: /* VL7 */
1352 case 0x8: /* VL8 */
1353 bound = pattern;
1354 break;
1355 case 0x9: /* VL16 */
1356 case 0xa: /* VL32 */
1357 case 0xb: /* VL64 */
1358 case 0xc: /* VL128 */
1359 case 0xd: /* VL256 */
1360 bound = 16 << (pattern - 9);
1361 break;
1362 case 0x1d: /* MUL4 */
1363 return elements - elements % 4;
1364 case 0x1e: /* MUL3 */
1365 return elements - elements % 3;
1366 case 0x1f: /* ALL */
1367 return elements;
1368 default: /* #uimm5 */
1369 return 0;
1370 }
1371 return elements >= bound ? bound : 0;
1372 }
1373
1374 /* This handles all of the predicate initialization instructions,
1375 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1376 * so that decode_pred_count returns 0. For SETFFR, we will have
1377 * set RD == 16 == FFR.
1378 */
1379 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1380 {
1381 if (!sve_access_check(s)) {
1382 return true;
1383 }
1384
1385 unsigned fullsz = vec_full_reg_size(s);
1386 unsigned ofs = pred_full_reg_offset(s, rd);
1387 unsigned numelem, setsz, i;
1388 uint64_t word, lastword;
1389 TCGv_i64 t;
1390
1391 numelem = decode_pred_count(fullsz, pat, esz);
1392
1393 /* Determine what we must store into each bit, and how many. */
1394 if (numelem == 0) {
1395 lastword = word = 0;
1396 setsz = fullsz;
1397 } else {
1398 setsz = numelem << esz;
1399 lastword = word = pred_esz_masks[esz];
1400 if (setsz % 64) {
1401 lastword &= ~(-1ull << (setsz % 64));
1402 }
1403 }
1404
1405 t = tcg_temp_new_i64();
1406 if (fullsz <= 64) {
1407 tcg_gen_movi_i64(t, lastword);
1408 tcg_gen_st_i64(t, cpu_env, ofs);
1409 goto done;
1410 }
1411
1412 if (word == lastword) {
1413 unsigned maxsz = size_for_gvec(fullsz / 8);
1414 unsigned oprsz = size_for_gvec(setsz / 8);
1415
1416 if (oprsz * 8 == setsz) {
1417 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1418 goto done;
1419 }
1420 if (oprsz * 8 == setsz + 8) {
1421 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1422 tcg_gen_movi_i64(t, 0);
1423 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1424 goto done;
1425 }
1426 }
1427
1428 setsz /= 8;
1429 fullsz /= 8;
1430
1431 tcg_gen_movi_i64(t, word);
1432 for (i = 0; i < setsz; i += 8) {
1433 tcg_gen_st_i64(t, cpu_env, ofs + i);
1434 }
1435 if (lastword != word) {
1436 tcg_gen_movi_i64(t, lastword);
1437 tcg_gen_st_i64(t, cpu_env, ofs + i);
1438 i += 8;
1439 }
1440 if (i < fullsz) {
1441 tcg_gen_movi_i64(t, 0);
1442 for (; i < fullsz; i += 8) {
1443 tcg_gen_st_i64(t, cpu_env, ofs + i);
1444 }
1445 }
1446
1447 done:
1448 tcg_temp_free_i64(t);
1449
1450 /* PTRUES */
1451 if (setflag) {
1452 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1453 tcg_gen_movi_i32(cpu_CF, word == 0);
1454 tcg_gen_movi_i32(cpu_VF, 0);
1455 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1456 }
1457 return true;
1458 }
1459
1460 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1461 {
1462 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1463 }
1464
1465 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1466 {
1467 /* Note pat == 31 is #all, to set all elements. */
1468 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1469 }
1470
1471 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1472 {
1473 /* Note pat == 32 is #unimp, to set no elements. */
1474 return do_predset(s, 0, a->rd, 32, false);
1475 }
1476
1477 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1478 {
1479 /* The path through do_pppp_flags is complicated enough to want to avoid
1480 * duplication. Frob the arguments into the form of a predicated AND.
1481 */
1482 arg_rprr_s alt_a = {
1483 .rd = a->rd, .pg = a->pg, .s = a->s,
1484 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1485 };
1486 return trans_AND_pppp(s, &alt_a, insn);
1487 }
1488
1489 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1490 {
1491 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1492 }
1493
1494 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1495 {
1496 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1497 }
1498
1499 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1500 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1501 TCGv_ptr, TCGv_i32))
1502 {
1503 if (!sve_access_check(s)) {
1504 return true;
1505 }
1506
1507 TCGv_ptr t_pd = tcg_temp_new_ptr();
1508 TCGv_ptr t_pg = tcg_temp_new_ptr();
1509 TCGv_i32 t;
1510 unsigned desc;
1511
1512 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1513 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1514
1515 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1516 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1517 t = tcg_const_i32(desc);
1518
1519 gen_fn(t, t_pd, t_pg, t);
1520 tcg_temp_free_ptr(t_pd);
1521 tcg_temp_free_ptr(t_pg);
1522
1523 do_pred_flags(t);
1524 tcg_temp_free_i32(t);
1525 return true;
1526 }
1527
1528 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1529 {
1530 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1531 }
1532
1533 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1534 {
1535 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1536 }
1537
1538 /*
1539 *** SVE Element Count Group
1540 */
1541
1542 /* Perform an inline saturating addition of a 32-bit value within
1543 * a 64-bit register. The second operand is known to be positive,
1544 * which halves the comparisions we must perform to bound the result.
1545 */
1546 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1547 {
1548 int64_t ibound;
1549 TCGv_i64 bound;
1550 TCGCond cond;
1551
1552 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1553 if (u) {
1554 tcg_gen_ext32u_i64(reg, reg);
1555 } else {
1556 tcg_gen_ext32s_i64(reg, reg);
1557 }
1558 if (d) {
1559 tcg_gen_sub_i64(reg, reg, val);
1560 ibound = (u ? 0 : INT32_MIN);
1561 cond = TCG_COND_LT;
1562 } else {
1563 tcg_gen_add_i64(reg, reg, val);
1564 ibound = (u ? UINT32_MAX : INT32_MAX);
1565 cond = TCG_COND_GT;
1566 }
1567 bound = tcg_const_i64(ibound);
1568 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1569 tcg_temp_free_i64(bound);
1570 }
1571
1572 /* Similarly with 64-bit values. */
1573 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1574 {
1575 TCGv_i64 t0 = tcg_temp_new_i64();
1576 TCGv_i64 t1 = tcg_temp_new_i64();
1577 TCGv_i64 t2;
1578
1579 if (u) {
1580 if (d) {
1581 tcg_gen_sub_i64(t0, reg, val);
1582 tcg_gen_movi_i64(t1, 0);
1583 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1584 } else {
1585 tcg_gen_add_i64(t0, reg, val);
1586 tcg_gen_movi_i64(t1, -1);
1587 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1588 }
1589 } else {
1590 if (d) {
1591 /* Detect signed overflow for subtraction. */
1592 tcg_gen_xor_i64(t0, reg, val);
1593 tcg_gen_sub_i64(t1, reg, val);
1594 tcg_gen_xor_i64(reg, reg, t0);
1595 tcg_gen_and_i64(t0, t0, reg);
1596
1597 /* Bound the result. */
1598 tcg_gen_movi_i64(reg, INT64_MIN);
1599 t2 = tcg_const_i64(0);
1600 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1601 } else {
1602 /* Detect signed overflow for addition. */
1603 tcg_gen_xor_i64(t0, reg, val);
1604 tcg_gen_add_i64(reg, reg, val);
1605 tcg_gen_xor_i64(t1, reg, val);
1606 tcg_gen_andc_i64(t0, t1, t0);
1607
1608 /* Bound the result. */
1609 tcg_gen_movi_i64(t1, INT64_MAX);
1610 t2 = tcg_const_i64(0);
1611 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1612 }
1613 tcg_temp_free_i64(t2);
1614 }
1615 tcg_temp_free_i64(t0);
1616 tcg_temp_free_i64(t1);
1617 }
1618
1619 /* Similarly with a vector and a scalar operand. */
1620 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1621 TCGv_i64 val, bool u, bool d)
1622 {
1623 unsigned vsz = vec_full_reg_size(s);
1624 TCGv_ptr dptr, nptr;
1625 TCGv_i32 t32, desc;
1626 TCGv_i64 t64;
1627
1628 dptr = tcg_temp_new_ptr();
1629 nptr = tcg_temp_new_ptr();
1630 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1631 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1632 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1633
1634 switch (esz) {
1635 case MO_8:
1636 t32 = tcg_temp_new_i32();
1637 tcg_gen_extrl_i64_i32(t32, val);
1638 if (d) {
1639 tcg_gen_neg_i32(t32, t32);
1640 }
1641 if (u) {
1642 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1643 } else {
1644 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1645 }
1646 tcg_temp_free_i32(t32);
1647 break;
1648
1649 case MO_16:
1650 t32 = tcg_temp_new_i32();
1651 tcg_gen_extrl_i64_i32(t32, val);
1652 if (d) {
1653 tcg_gen_neg_i32(t32, t32);
1654 }
1655 if (u) {
1656 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1657 } else {
1658 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1659 }
1660 tcg_temp_free_i32(t32);
1661 break;
1662
1663 case MO_32:
1664 t64 = tcg_temp_new_i64();
1665 if (d) {
1666 tcg_gen_neg_i64(t64, val);
1667 } else {
1668 tcg_gen_mov_i64(t64, val);
1669 }
1670 if (u) {
1671 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1672 } else {
1673 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1674 }
1675 tcg_temp_free_i64(t64);
1676 break;
1677
1678 case MO_64:
1679 if (u) {
1680 if (d) {
1681 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1682 } else {
1683 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1684 }
1685 } else if (d) {
1686 t64 = tcg_temp_new_i64();
1687 tcg_gen_neg_i64(t64, val);
1688 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1689 tcg_temp_free_i64(t64);
1690 } else {
1691 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1692 }
1693 break;
1694
1695 default:
1696 g_assert_not_reached();
1697 }
1698
1699 tcg_temp_free_ptr(dptr);
1700 tcg_temp_free_ptr(nptr);
1701 tcg_temp_free_i32(desc);
1702 }
1703
1704 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1705 {
1706 if (sve_access_check(s)) {
1707 unsigned fullsz = vec_full_reg_size(s);
1708 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1709 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1710 }
1711 return true;
1712 }
1713
1714 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1715 {
1716 if (sve_access_check(s)) {
1717 unsigned fullsz = vec_full_reg_size(s);
1718 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1719 int inc = numelem * a->imm * (a->d ? -1 : 1);
1720 TCGv_i64 reg = cpu_reg(s, a->rd);
1721
1722 tcg_gen_addi_i64(reg, reg, inc);
1723 }
1724 return true;
1725 }
1726
1727 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1728 uint32_t insn)
1729 {
1730 if (!sve_access_check(s)) {
1731 return true;
1732 }
1733
1734 unsigned fullsz = vec_full_reg_size(s);
1735 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736 int inc = numelem * a->imm;
1737 TCGv_i64 reg = cpu_reg(s, a->rd);
1738
1739 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1740 if (inc == 0) {
1741 if (a->u) {
1742 tcg_gen_ext32u_i64(reg, reg);
1743 } else {
1744 tcg_gen_ext32s_i64(reg, reg);
1745 }
1746 } else {
1747 TCGv_i64 t = tcg_const_i64(inc);
1748 do_sat_addsub_32(reg, t, a->u, a->d);
1749 tcg_temp_free_i64(t);
1750 }
1751 return true;
1752 }
1753
1754 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1755 uint32_t insn)
1756 {
1757 if (!sve_access_check(s)) {
1758 return true;
1759 }
1760
1761 unsigned fullsz = vec_full_reg_size(s);
1762 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1763 int inc = numelem * a->imm;
1764 TCGv_i64 reg = cpu_reg(s, a->rd);
1765
1766 if (inc != 0) {
1767 TCGv_i64 t = tcg_const_i64(inc);
1768 do_sat_addsub_64(reg, t, a->u, a->d);
1769 tcg_temp_free_i64(t);
1770 }
1771 return true;
1772 }
1773
1774 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1775 {
1776 if (a->esz == 0) {
1777 return false;
1778 }
1779
1780 unsigned fullsz = vec_full_reg_size(s);
1781 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1782 int inc = numelem * a->imm;
1783
1784 if (inc != 0) {
1785 if (sve_access_check(s)) {
1786 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1787 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1788 vec_full_reg_offset(s, a->rn),
1789 t, fullsz, fullsz);
1790 tcg_temp_free_i64(t);
1791 }
1792 } else {
1793 do_mov_z(s, a->rd, a->rn);
1794 }
1795 return true;
1796 }
1797
1798 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1799 uint32_t insn)
1800 {
1801 if (a->esz == 0) {
1802 return false;
1803 }
1804
1805 unsigned fullsz = vec_full_reg_size(s);
1806 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1807 int inc = numelem * a->imm;
1808
1809 if (inc != 0) {
1810 if (sve_access_check(s)) {
1811 TCGv_i64 t = tcg_const_i64(inc);
1812 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1813 tcg_temp_free_i64(t);
1814 }
1815 } else {
1816 do_mov_z(s, a->rd, a->rn);
1817 }
1818 return true;
1819 }
1820
1821 /*
1822 *** SVE Bitwise Immediate Group
1823 */
1824
1825 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1826 {
1827 uint64_t imm;
1828 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1829 extract32(a->dbm, 0, 6),
1830 extract32(a->dbm, 6, 6))) {
1831 return false;
1832 }
1833 if (sve_access_check(s)) {
1834 unsigned vsz = vec_full_reg_size(s);
1835 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1836 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1837 }
1838 return true;
1839 }
1840
1841 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1842 {
1843 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1844 }
1845
1846 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1847 {
1848 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1849 }
1850
1851 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1852 {
1853 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1854 }
1855
1856 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1857 {
1858 uint64_t imm;
1859 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1860 extract32(a->dbm, 0, 6),
1861 extract32(a->dbm, 6, 6))) {
1862 return false;
1863 }
1864 if (sve_access_check(s)) {
1865 do_dupi_z(s, a->rd, imm);
1866 }
1867 return true;
1868 }
1869
1870 /*
1871 *** SVE Integer Wide Immediate - Predicated Group
1872 */
1873
1874 /* Implement all merging copies. This is used for CPY (immediate),
1875 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1876 */
1877 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1878 TCGv_i64 val)
1879 {
1880 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1881 static gen_cpy * const fns[4] = {
1882 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1883 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1884 };
1885 unsigned vsz = vec_full_reg_size(s);
1886 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1887 TCGv_ptr t_zd = tcg_temp_new_ptr();
1888 TCGv_ptr t_zn = tcg_temp_new_ptr();
1889 TCGv_ptr t_pg = tcg_temp_new_ptr();
1890
1891 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1892 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1893 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1894
1895 fns[esz](t_zd, t_zn, t_pg, val, desc);
1896
1897 tcg_temp_free_ptr(t_zd);
1898 tcg_temp_free_ptr(t_zn);
1899 tcg_temp_free_ptr(t_pg);
1900 tcg_temp_free_i32(desc);
1901 }
1902
1903 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1904 {
1905 if (a->esz == 0) {
1906 return false;
1907 }
1908 if (sve_access_check(s)) {
1909 /* Decode the VFP immediate. */
1910 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1911 TCGv_i64 t_imm = tcg_const_i64(imm);
1912 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1913 tcg_temp_free_i64(t_imm);
1914 }
1915 return true;
1916 }
1917
1918 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1919 {
1920 if (a->esz == 0 && extract32(insn, 13, 1)) {
1921 return false;
1922 }
1923 if (sve_access_check(s)) {
1924 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1925 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1926 tcg_temp_free_i64(t_imm);
1927 }
1928 return true;
1929 }
1930
1931 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1932 {
1933 static gen_helper_gvec_2i * const fns[4] = {
1934 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1935 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1936 };
1937
1938 if (a->esz == 0 && extract32(insn, 13, 1)) {
1939 return false;
1940 }
1941 if (sve_access_check(s)) {
1942 unsigned vsz = vec_full_reg_size(s);
1943 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1944 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1945 pred_full_reg_offset(s, a->pg),
1946 t_imm, vsz, vsz, 0, fns[a->esz]);
1947 tcg_temp_free_i64(t_imm);
1948 }
1949 return true;
1950 }
1951
1952 /*
1953 *** SVE Permute Extract Group
1954 */
1955
1956 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1957 {
1958 if (!sve_access_check(s)) {
1959 return true;
1960 }
1961
1962 unsigned vsz = vec_full_reg_size(s);
1963 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1964 unsigned n_siz = vsz - n_ofs;
1965 unsigned d = vec_full_reg_offset(s, a->rd);
1966 unsigned n = vec_full_reg_offset(s, a->rn);
1967 unsigned m = vec_full_reg_offset(s, a->rm);
1968
1969 /* Use host vector move insns if we have appropriate sizes
1970 * and no unfortunate overlap.
1971 */
1972 if (m != d
1973 && n_ofs == size_for_gvec(n_ofs)
1974 && n_siz == size_for_gvec(n_siz)
1975 && (d != n || n_siz <= n_ofs)) {
1976 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1977 if (n_ofs != 0) {
1978 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1979 }
1980 } else {
1981 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1982 }
1983 return true;
1984 }
1985
1986 /*
1987 *** SVE Permute - Unpredicated Group
1988 */
1989
1990 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1991 {
1992 if (sve_access_check(s)) {
1993 unsigned vsz = vec_full_reg_size(s);
1994 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1995 vsz, vsz, cpu_reg_sp(s, a->rn));
1996 }
1997 return true;
1998 }
1999
2000 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2001 {
2002 if ((a->imm & 0x1f) == 0) {
2003 return false;
2004 }
2005 if (sve_access_check(s)) {
2006 unsigned vsz = vec_full_reg_size(s);
2007 unsigned dofs = vec_full_reg_offset(s, a->rd);
2008 unsigned esz, index;
2009
2010 esz = ctz32(a->imm);
2011 index = a->imm >> (esz + 1);
2012
2013 if ((index << esz) < vsz) {
2014 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2015 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2016 } else {
2017 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2018 }
2019 }
2020 return true;
2021 }
2022
2023 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2024 {
2025 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2026 static gen_insr * const fns[4] = {
2027 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2028 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2029 };
2030 unsigned vsz = vec_full_reg_size(s);
2031 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2032 TCGv_ptr t_zd = tcg_temp_new_ptr();
2033 TCGv_ptr t_zn = tcg_temp_new_ptr();
2034
2035 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2036 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2037
2038 fns[a->esz](t_zd, t_zn, val, desc);
2039
2040 tcg_temp_free_ptr(t_zd);
2041 tcg_temp_free_ptr(t_zn);
2042 tcg_temp_free_i32(desc);
2043 }
2044
2045 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2046 {
2047 if (sve_access_check(s)) {
2048 TCGv_i64 t = tcg_temp_new_i64();
2049 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2050 do_insr_i64(s, a, t);
2051 tcg_temp_free_i64(t);
2052 }
2053 return true;
2054 }
2055
2056 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2057 {
2058 if (sve_access_check(s)) {
2059 do_insr_i64(s, a, cpu_reg(s, a->rm));
2060 }
2061 return true;
2062 }
2063
2064 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2065 {
2066 static gen_helper_gvec_2 * const fns[4] = {
2067 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2068 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2069 };
2070
2071 if (sve_access_check(s)) {
2072 unsigned vsz = vec_full_reg_size(s);
2073 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2074 vec_full_reg_offset(s, a->rn),
2075 vsz, vsz, 0, fns[a->esz]);
2076 }
2077 return true;
2078 }
2079
2080 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2081 {
2082 static gen_helper_gvec_3 * const fns[4] = {
2083 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2084 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2085 };
2086
2087 if (sve_access_check(s)) {
2088 unsigned vsz = vec_full_reg_size(s);
2089 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2090 vec_full_reg_offset(s, a->rn),
2091 vec_full_reg_offset(s, a->rm),
2092 vsz, vsz, 0, fns[a->esz]);
2093 }
2094 return true;
2095 }
2096
2097 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2098 {
2099 static gen_helper_gvec_2 * const fns[4][2] = {
2100 { NULL, NULL },
2101 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2102 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2103 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2104 };
2105
2106 if (a->esz == 0) {
2107 return false;
2108 }
2109 if (sve_access_check(s)) {
2110 unsigned vsz = vec_full_reg_size(s);
2111 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2112 vec_full_reg_offset(s, a->rn)
2113 + (a->h ? vsz / 2 : 0),
2114 vsz, vsz, 0, fns[a->esz][a->u]);
2115 }
2116 return true;
2117 }
2118
2119 /*
2120 *** SVE Permute - Predicates Group
2121 */
2122
2123 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2124 gen_helper_gvec_3 *fn)
2125 {
2126 if (!sve_access_check(s)) {
2127 return true;
2128 }
2129
2130 unsigned vsz = pred_full_reg_size(s);
2131
2132 /* Predicate sizes may be smaller and cannot use simd_desc.
2133 We cannot round up, as we do elsewhere, because we need
2134 the exact size for ZIP2 and REV. We retain the style for
2135 the other helpers for consistency. */
2136 TCGv_ptr t_d = tcg_temp_new_ptr();
2137 TCGv_ptr t_n = tcg_temp_new_ptr();
2138 TCGv_ptr t_m = tcg_temp_new_ptr();
2139 TCGv_i32 t_desc;
2140 int desc;
2141
2142 desc = vsz - 2;
2143 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2144 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2145
2146 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2147 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2148 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2149 t_desc = tcg_const_i32(desc);
2150
2151 fn(t_d, t_n, t_m, t_desc);
2152
2153 tcg_temp_free_ptr(t_d);
2154 tcg_temp_free_ptr(t_n);
2155 tcg_temp_free_ptr(t_m);
2156 tcg_temp_free_i32(t_desc);
2157 return true;
2158 }
2159
2160 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2161 gen_helper_gvec_2 *fn)
2162 {
2163 if (!sve_access_check(s)) {
2164 return true;
2165 }
2166
2167 unsigned vsz = pred_full_reg_size(s);
2168 TCGv_ptr t_d = tcg_temp_new_ptr();
2169 TCGv_ptr t_n = tcg_temp_new_ptr();
2170 TCGv_i32 t_desc;
2171 int desc;
2172
2173 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2174 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2175
2176 /* Predicate sizes may be smaller and cannot use simd_desc.
2177 We cannot round up, as we do elsewhere, because we need
2178 the exact size for ZIP2 and REV. We retain the style for
2179 the other helpers for consistency. */
2180
2181 desc = vsz - 2;
2182 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2183 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2184 t_desc = tcg_const_i32(desc);
2185
2186 fn(t_d, t_n, t_desc);
2187
2188 tcg_temp_free_i32(t_desc);
2189 tcg_temp_free_ptr(t_d);
2190 tcg_temp_free_ptr(t_n);
2191 return true;
2192 }
2193
2194 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2195 {
2196 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2197 }
2198
2199 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2200 {
2201 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2202 }
2203
2204 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2205 {
2206 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2207 }
2208
2209 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2210 {
2211 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2212 }
2213
2214 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2215 {
2216 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2217 }
2218
2219 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2220 {
2221 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2222 }
2223
2224 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2225 {
2226 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2227 }
2228
2229 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2230 {
2231 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2232 }
2233
2234 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2235 {
2236 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2237 }
2238
2239 /*
2240 *** SVE Permute - Interleaving Group
2241 */
2242
2243 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2244 {
2245 static gen_helper_gvec_3 * const fns[4] = {
2246 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2247 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2248 };
2249
2250 if (sve_access_check(s)) {
2251 unsigned vsz = vec_full_reg_size(s);
2252 unsigned high_ofs = high ? vsz / 2 : 0;
2253 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2254 vec_full_reg_offset(s, a->rn) + high_ofs,
2255 vec_full_reg_offset(s, a->rm) + high_ofs,
2256 vsz, vsz, 0, fns[a->esz]);
2257 }
2258 return true;
2259 }
2260
2261 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2262 gen_helper_gvec_3 *fn)
2263 {
2264 if (sve_access_check(s)) {
2265 unsigned vsz = vec_full_reg_size(s);
2266 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2267 vec_full_reg_offset(s, a->rn),
2268 vec_full_reg_offset(s, a->rm),
2269 vsz, vsz, data, fn);
2270 }
2271 return true;
2272 }
2273
2274 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2275 {
2276 return do_zip(s, a, false);
2277 }
2278
2279 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2280 {
2281 return do_zip(s, a, true);
2282 }
2283
2284 static gen_helper_gvec_3 * const uzp_fns[4] = {
2285 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2286 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2287 };
2288
2289 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2290 {
2291 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2292 }
2293
2294 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2295 {
2296 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2297 }
2298
2299 static gen_helper_gvec_3 * const trn_fns[4] = {
2300 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2301 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2302 };
2303
2304 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2305 {
2306 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2307 }
2308
2309 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2310 {
2311 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2312 }
2313
2314 /*
2315 *** SVE Permute Vector - Predicated Group
2316 */
2317
2318 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2319 {
2320 static gen_helper_gvec_3 * const fns[4] = {
2321 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2322 };
2323 return do_zpz_ool(s, a, fns[a->esz]);
2324 }
2325
2326 /* Call the helper that computes the ARM LastActiveElement pseudocode
2327 * function, scaled by the element size. This includes the not found
2328 * indication; e.g. not found for esz=3 is -8.
2329 */
2330 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2331 {
2332 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2333 * round up, as we do elsewhere, because we need the exact size.
2334 */
2335 TCGv_ptr t_p = tcg_temp_new_ptr();
2336 TCGv_i32 t_desc;
2337 unsigned vsz = pred_full_reg_size(s);
2338 unsigned desc;
2339
2340 desc = vsz - 2;
2341 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2342
2343 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2344 t_desc = tcg_const_i32(desc);
2345
2346 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2347
2348 tcg_temp_free_i32(t_desc);
2349 tcg_temp_free_ptr(t_p);
2350 }
2351
2352 /* Increment LAST to the offset of the next element in the vector,
2353 * wrapping around to 0.
2354 */
2355 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2356 {
2357 unsigned vsz = vec_full_reg_size(s);
2358
2359 tcg_gen_addi_i32(last, last, 1 << esz);
2360 if (is_power_of_2(vsz)) {
2361 tcg_gen_andi_i32(last, last, vsz - 1);
2362 } else {
2363 TCGv_i32 max = tcg_const_i32(vsz);
2364 TCGv_i32 zero = tcg_const_i32(0);
2365 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2366 tcg_temp_free_i32(max);
2367 tcg_temp_free_i32(zero);
2368 }
2369 }
2370
2371 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2372 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2373 {
2374 unsigned vsz = vec_full_reg_size(s);
2375
2376 if (is_power_of_2(vsz)) {
2377 tcg_gen_andi_i32(last, last, vsz - 1);
2378 } else {
2379 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2380 TCGv_i32 zero = tcg_const_i32(0);
2381 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2382 tcg_temp_free_i32(max);
2383 tcg_temp_free_i32(zero);
2384 }
2385 }
2386
2387 /* Load an unsigned element of ESZ from BASE+OFS. */
2388 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2389 {
2390 TCGv_i64 r = tcg_temp_new_i64();
2391
2392 switch (esz) {
2393 case 0:
2394 tcg_gen_ld8u_i64(r, base, ofs);
2395 break;
2396 case 1:
2397 tcg_gen_ld16u_i64(r, base, ofs);
2398 break;
2399 case 2:
2400 tcg_gen_ld32u_i64(r, base, ofs);
2401 break;
2402 case 3:
2403 tcg_gen_ld_i64(r, base, ofs);
2404 break;
2405 default:
2406 g_assert_not_reached();
2407 }
2408 return r;
2409 }
2410
2411 /* Load an unsigned element of ESZ from RM[LAST]. */
2412 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2413 int rm, int esz)
2414 {
2415 TCGv_ptr p = tcg_temp_new_ptr();
2416 TCGv_i64 r;
2417
2418 /* Convert offset into vector into offset into ENV.
2419 * The final adjustment for the vector register base
2420 * is added via constant offset to the load.
2421 */
2422 #ifdef HOST_WORDS_BIGENDIAN
2423 /* Adjust for element ordering. See vec_reg_offset. */
2424 if (esz < 3) {
2425 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2426 }
2427 #endif
2428 tcg_gen_ext_i32_ptr(p, last);
2429 tcg_gen_add_ptr(p, p, cpu_env);
2430
2431 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2432 tcg_temp_free_ptr(p);
2433
2434 return r;
2435 }
2436
2437 /* Compute CLAST for a Zreg. */
2438 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2439 {
2440 TCGv_i32 last;
2441 TCGLabel *over;
2442 TCGv_i64 ele;
2443 unsigned vsz, esz = a->esz;
2444
2445 if (!sve_access_check(s)) {
2446 return true;
2447 }
2448
2449 last = tcg_temp_local_new_i32();
2450 over = gen_new_label();
2451
2452 find_last_active(s, last, esz, a->pg);
2453
2454 /* There is of course no movcond for a 2048-bit vector,
2455 * so we must branch over the actual store.
2456 */
2457 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2458
2459 if (!before) {
2460 incr_last_active(s, last, esz);
2461 }
2462
2463 ele = load_last_active(s, last, a->rm, esz);
2464 tcg_temp_free_i32(last);
2465
2466 vsz = vec_full_reg_size(s);
2467 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2468 tcg_temp_free_i64(ele);
2469
2470 /* If this insn used MOVPRFX, we may need a second move. */
2471 if (a->rd != a->rn) {
2472 TCGLabel *done = gen_new_label();
2473 tcg_gen_br(done);
2474
2475 gen_set_label(over);
2476 do_mov_z(s, a->rd, a->rn);
2477
2478 gen_set_label(done);
2479 } else {
2480 gen_set_label(over);
2481 }
2482 return true;
2483 }
2484
2485 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2486 {
2487 return do_clast_vector(s, a, false);
2488 }
2489
2490 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2491 {
2492 return do_clast_vector(s, a, true);
2493 }
2494
2495 /* Compute CLAST for a scalar. */
2496 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2497 bool before, TCGv_i64 reg_val)
2498 {
2499 TCGv_i32 last = tcg_temp_new_i32();
2500 TCGv_i64 ele, cmp, zero;
2501
2502 find_last_active(s, last, esz, pg);
2503
2504 /* Extend the original value of last prior to incrementing. */
2505 cmp = tcg_temp_new_i64();
2506 tcg_gen_ext_i32_i64(cmp, last);
2507
2508 if (!before) {
2509 incr_last_active(s, last, esz);
2510 }
2511
2512 /* The conceit here is that while last < 0 indicates not found, after
2513 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2514 * from which we can load garbage. We then discard the garbage with
2515 * a conditional move.
2516 */
2517 ele = load_last_active(s, last, rm, esz);
2518 tcg_temp_free_i32(last);
2519
2520 zero = tcg_const_i64(0);
2521 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2522
2523 tcg_temp_free_i64(zero);
2524 tcg_temp_free_i64(cmp);
2525 tcg_temp_free_i64(ele);
2526 }
2527
2528 /* Compute CLAST for a Vreg. */
2529 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2530 {
2531 if (sve_access_check(s)) {
2532 int esz = a->esz;
2533 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2534 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2535
2536 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2537 write_fp_dreg(s, a->rd, reg);
2538 tcg_temp_free_i64(reg);
2539 }
2540 return true;
2541 }
2542
2543 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2544 {
2545 return do_clast_fp(s, a, false);
2546 }
2547
2548 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2549 {
2550 return do_clast_fp(s, a, true);
2551 }
2552
2553 /* Compute CLAST for a Xreg. */
2554 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2555 {
2556 TCGv_i64 reg;
2557
2558 if (!sve_access_check(s)) {
2559 return true;
2560 }
2561
2562 reg = cpu_reg(s, a->rd);
2563 switch (a->esz) {
2564 case 0:
2565 tcg_gen_ext8u_i64(reg, reg);
2566 break;
2567 case 1:
2568 tcg_gen_ext16u_i64(reg, reg);
2569 break;
2570 case 2:
2571 tcg_gen_ext32u_i64(reg, reg);
2572 break;
2573 case 3:
2574 break;
2575 default:
2576 g_assert_not_reached();
2577 }
2578
2579 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2580 return true;
2581 }
2582
2583 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2584 {
2585 return do_clast_general(s, a, false);
2586 }
2587
2588 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2589 {
2590 return do_clast_general(s, a, true);
2591 }
2592
2593 /* Compute LAST for a scalar. */
2594 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2595 int pg, int rm, bool before)
2596 {
2597 TCGv_i32 last = tcg_temp_new_i32();
2598 TCGv_i64 ret;
2599
2600 find_last_active(s, last, esz, pg);
2601 if (before) {
2602 wrap_last_active(s, last, esz);
2603 } else {
2604 incr_last_active(s, last, esz);
2605 }
2606
2607 ret = load_last_active(s, last, rm, esz);
2608 tcg_temp_free_i32(last);
2609 return ret;
2610 }
2611
2612 /* Compute LAST for a Vreg. */
2613 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2614 {
2615 if (sve_access_check(s)) {
2616 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2617 write_fp_dreg(s, a->rd, val);
2618 tcg_temp_free_i64(val);
2619 }
2620 return true;
2621 }
2622
2623 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2624 {
2625 return do_last_fp(s, a, false);
2626 }
2627
2628 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2629 {
2630 return do_last_fp(s, a, true);
2631 }
2632
2633 /* Compute LAST for a Xreg. */
2634 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2635 {
2636 if (sve_access_check(s)) {
2637 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2638 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2639 tcg_temp_free_i64(val);
2640 }
2641 return true;
2642 }
2643
2644 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2645 {
2646 return do_last_general(s, a, false);
2647 }
2648
2649 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2650 {
2651 return do_last_general(s, a, true);
2652 }
2653
2654 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2655 {
2656 if (sve_access_check(s)) {
2657 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2658 }
2659 return true;
2660 }
2661
2662 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2663 {
2664 if (sve_access_check(s)) {
2665 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2666 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2667 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2668 tcg_temp_free_i64(t);
2669 }
2670 return true;
2671 }
2672
2673 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2674 {
2675 static gen_helper_gvec_3 * const fns[4] = {
2676 NULL,
2677 gen_helper_sve_revb_h,
2678 gen_helper_sve_revb_s,
2679 gen_helper_sve_revb_d,
2680 };
2681 return do_zpz_ool(s, a, fns[a->esz]);
2682 }
2683
2684 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2685 {
2686 static gen_helper_gvec_3 * const fns[4] = {
2687 NULL,
2688 NULL,
2689 gen_helper_sve_revh_s,
2690 gen_helper_sve_revh_d,
2691 };
2692 return do_zpz_ool(s, a, fns[a->esz]);
2693 }
2694
2695 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2696 {
2697 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2698 }
2699
2700 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2701 {
2702 static gen_helper_gvec_3 * const fns[4] = {
2703 gen_helper_sve_rbit_b,
2704 gen_helper_sve_rbit_h,
2705 gen_helper_sve_rbit_s,
2706 gen_helper_sve_rbit_d,
2707 };
2708 return do_zpz_ool(s, a, fns[a->esz]);
2709 }
2710
2711 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2712 {
2713 if (sve_access_check(s)) {
2714 unsigned vsz = vec_full_reg_size(s);
2715 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2716 vec_full_reg_offset(s, a->rn),
2717 vec_full_reg_offset(s, a->rm),
2718 pred_full_reg_offset(s, a->pg),
2719 vsz, vsz, a->esz, gen_helper_sve_splice);
2720 }
2721 return true;
2722 }
2723
2724 /*
2725 *** SVE Integer Compare - Vectors Group
2726 */
2727
2728 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2729 gen_helper_gvec_flags_4 *gen_fn)
2730 {
2731 TCGv_ptr pd, zn, zm, pg;
2732 unsigned vsz;
2733 TCGv_i32 t;
2734
2735 if (gen_fn == NULL) {
2736 return false;
2737 }
2738 if (!sve_access_check(s)) {
2739 return true;
2740 }
2741
2742 vsz = vec_full_reg_size(s);
2743 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2744 pd = tcg_temp_new_ptr();
2745 zn = tcg_temp_new_ptr();
2746 zm = tcg_temp_new_ptr();
2747 pg = tcg_temp_new_ptr();
2748
2749 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2750 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2751 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2752 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2753
2754 gen_fn(t, pd, zn, zm, pg, t);
2755
2756 tcg_temp_free_ptr(pd);
2757 tcg_temp_free_ptr(zn);
2758 tcg_temp_free_ptr(zm);
2759 tcg_temp_free_ptr(pg);
2760
2761 do_pred_flags(t);
2762
2763 tcg_temp_free_i32(t);
2764 return true;
2765 }
2766
2767 #define DO_PPZZ(NAME, name) \
2768 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2769 uint32_t insn) \
2770 { \
2771 static gen_helper_gvec_flags_4 * const fns[4] = { \
2772 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2773 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2774 }; \
2775 return do_ppzz_flags(s, a, fns[a->esz]); \
2776 }
2777
2778 DO_PPZZ(CMPEQ, cmpeq)
2779 DO_PPZZ(CMPNE, cmpne)
2780 DO_PPZZ(CMPGT, cmpgt)
2781 DO_PPZZ(CMPGE, cmpge)
2782 DO_PPZZ(CMPHI, cmphi)
2783 DO_PPZZ(CMPHS, cmphs)
2784
2785 #undef DO_PPZZ
2786
2787 #define DO_PPZW(NAME, name) \
2788 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2789 uint32_t insn) \
2790 { \
2791 static gen_helper_gvec_flags_4 * const fns[4] = { \
2792 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2793 gen_helper_sve_##name##_ppzw_s, NULL \
2794 }; \
2795 return do_ppzz_flags(s, a, fns[a->esz]); \
2796 }
2797
2798 DO_PPZW(CMPEQ, cmpeq)
2799 DO_PPZW(CMPNE, cmpne)
2800 DO_PPZW(CMPGT, cmpgt)
2801 DO_PPZW(CMPGE, cmpge)
2802 DO_PPZW(CMPHI, cmphi)
2803 DO_PPZW(CMPHS, cmphs)
2804 DO_PPZW(CMPLT, cmplt)
2805 DO_PPZW(CMPLE, cmple)
2806 DO_PPZW(CMPLO, cmplo)
2807 DO_PPZW(CMPLS, cmpls)
2808
2809 #undef DO_PPZW
2810
2811 /*
2812 *** SVE Integer Compare - Immediate Groups
2813 */
2814
2815 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2816 gen_helper_gvec_flags_3 *gen_fn)
2817 {
2818 TCGv_ptr pd, zn, pg;
2819 unsigned vsz;
2820 TCGv_i32 t;
2821
2822 if (gen_fn == NULL) {
2823 return false;
2824 }
2825 if (!sve_access_check(s)) {
2826 return true;
2827 }
2828
2829 vsz = vec_full_reg_size(s);
2830 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2831 pd = tcg_temp_new_ptr();
2832 zn = tcg_temp_new_ptr();
2833 pg = tcg_temp_new_ptr();
2834
2835 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2836 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2837 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2838
2839 gen_fn(t, pd, zn, pg, t);
2840
2841 tcg_temp_free_ptr(pd);
2842 tcg_temp_free_ptr(zn);
2843 tcg_temp_free_ptr(pg);
2844
2845 do_pred_flags(t);
2846
2847 tcg_temp_free_i32(t);
2848 return true;
2849 }
2850
2851 #define DO_PPZI(NAME, name) \
2852 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2853 uint32_t insn) \
2854 { \
2855 static gen_helper_gvec_flags_3 * const fns[4] = { \
2856 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2857 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2858 }; \
2859 return do_ppzi_flags(s, a, fns[a->esz]); \
2860 }
2861
2862 DO_PPZI(CMPEQ, cmpeq)
2863 DO_PPZI(CMPNE, cmpne)
2864 DO_PPZI(CMPGT, cmpgt)
2865 DO_PPZI(CMPGE, cmpge)
2866 DO_PPZI(CMPHI, cmphi)
2867 DO_PPZI(CMPHS, cmphs)
2868 DO_PPZI(CMPLT, cmplt)
2869 DO_PPZI(CMPLE, cmple)
2870 DO_PPZI(CMPLO, cmplo)
2871 DO_PPZI(CMPLS, cmpls)
2872
2873 #undef DO_PPZI
2874
2875 /*
2876 *** SVE Partition Break Group
2877 */
2878
2879 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2880 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2881 {
2882 if (!sve_access_check(s)) {
2883 return true;
2884 }
2885
2886 unsigned vsz = pred_full_reg_size(s);
2887
2888 /* Predicate sizes may be smaller and cannot use simd_desc. */
2889 TCGv_ptr d = tcg_temp_new_ptr();
2890 TCGv_ptr n = tcg_temp_new_ptr();
2891 TCGv_ptr m = tcg_temp_new_ptr();
2892 TCGv_ptr g = tcg_temp_new_ptr();
2893 TCGv_i32 t = tcg_const_i32(vsz - 2);
2894
2895 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2896 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2897 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2898 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2899
2900 if (a->s) {
2901 fn_s(t, d, n, m, g, t);
2902 do_pred_flags(t);
2903 } else {
2904 fn(d, n, m, g, t);
2905 }
2906 tcg_temp_free_ptr(d);
2907 tcg_temp_free_ptr(n);
2908 tcg_temp_free_ptr(m);
2909 tcg_temp_free_ptr(g);
2910 tcg_temp_free_i32(t);
2911 return true;
2912 }
2913
2914 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2915 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2916 {
2917 if (!sve_access_check(s)) {
2918 return true;
2919 }
2920
2921 unsigned vsz = pred_full_reg_size(s);
2922
2923 /* Predicate sizes may be smaller and cannot use simd_desc. */
2924 TCGv_ptr d = tcg_temp_new_ptr();
2925 TCGv_ptr n = tcg_temp_new_ptr();
2926 TCGv_ptr g = tcg_temp_new_ptr();
2927 TCGv_i32 t = tcg_const_i32(vsz - 2);
2928
2929 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2930 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2931 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2932
2933 if (a->s) {
2934 fn_s(t, d, n, g, t);
2935 do_pred_flags(t);
2936 } else {
2937 fn(d, n, g, t);
2938 }
2939 tcg_temp_free_ptr(d);
2940 tcg_temp_free_ptr(n);
2941 tcg_temp_free_ptr(g);
2942 tcg_temp_free_i32(t);
2943 return true;
2944 }
2945
2946 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2947 {
2948 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2949 }
2950
2951 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2952 {
2953 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2954 }
2955
2956 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2957 {
2958 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2959 }
2960
2961 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2962 {
2963 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2964 }
2965
2966 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2967 {
2968 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2969 }
2970
2971 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2972 {
2973 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2974 }
2975
2976 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2977 {
2978 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2979 }
2980
2981 /*
2982 *** SVE Predicate Count Group
2983 */
2984
2985 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2986 {
2987 unsigned psz = pred_full_reg_size(s);
2988
2989 if (psz <= 8) {
2990 uint64_t psz_mask;
2991
2992 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2993 if (pn != pg) {
2994 TCGv_i64 g = tcg_temp_new_i64();
2995 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2996 tcg_gen_and_i64(val, val, g);
2997 tcg_temp_free_i64(g);
2998 }
2999
3000 /* Reduce the pred_esz_masks value simply to reduce the
3001 * size of the code generated here.
3002 */
3003 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3004 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3005
3006 tcg_gen_ctpop_i64(val, val);
3007 } else {
3008 TCGv_ptr t_pn = tcg_temp_new_ptr();
3009 TCGv_ptr t_pg = tcg_temp_new_ptr();
3010 unsigned desc;
3011 TCGv_i32 t_desc;
3012
3013 desc = psz - 2;
3014 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3015
3016 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3017 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3018 t_desc = tcg_const_i32(desc);
3019
3020 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3021 tcg_temp_free_ptr(t_pn);
3022 tcg_temp_free_ptr(t_pg);
3023 tcg_temp_free_i32(t_desc);
3024 }
3025 }
3026
3027 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3028 {
3029 if (sve_access_check(s)) {
3030 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3031 }
3032 return true;
3033 }
3034
3035 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3036 uint32_t insn)
3037 {
3038 if (sve_access_check(s)) {
3039 TCGv_i64 reg = cpu_reg(s, a->rd);
3040 TCGv_i64 val = tcg_temp_new_i64();
3041
3042 do_cntp(s, val, a->esz, a->pg, a->pg);
3043 if (a->d) {
3044 tcg_gen_sub_i64(reg, reg, val);
3045 } else {
3046 tcg_gen_add_i64(reg, reg, val);
3047 }
3048 tcg_temp_free_i64(val);
3049 }
3050 return true;
3051 }
3052
3053 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3054 uint32_t insn)
3055 {
3056 if (a->esz == 0) {
3057 return false;
3058 }
3059 if (sve_access_check(s)) {
3060 unsigned vsz = vec_full_reg_size(s);
3061 TCGv_i64 val = tcg_temp_new_i64();
3062 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3063
3064 do_cntp(s, val, a->esz, a->pg, a->pg);
3065 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3066 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3067 }
3068 return true;
3069 }
3070
3071 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3072 uint32_t insn)
3073 {
3074 if (sve_access_check(s)) {
3075 TCGv_i64 reg = cpu_reg(s, a->rd);
3076 TCGv_i64 val = tcg_temp_new_i64();
3077
3078 do_cntp(s, val, a->esz, a->pg, a->pg);
3079 do_sat_addsub_32(reg, val, a->u, a->d);
3080 }
3081 return true;
3082 }
3083
3084 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3085 uint32_t insn)
3086 {
3087 if (sve_access_check(s)) {
3088 TCGv_i64 reg = cpu_reg(s, a->rd);
3089 TCGv_i64 val = tcg_temp_new_i64();
3090
3091 do_cntp(s, val, a->esz, a->pg, a->pg);
3092 do_sat_addsub_64(reg, val, a->u, a->d);
3093 }
3094 return true;
3095 }
3096
3097 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3098 uint32_t insn)
3099 {
3100 if (a->esz == 0) {
3101 return false;
3102 }
3103 if (sve_access_check(s)) {
3104 TCGv_i64 val = tcg_temp_new_i64();
3105 do_cntp(s, val, a->esz, a->pg, a->pg);
3106 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3107 }
3108 return true;
3109 }
3110
3111 /*
3112 *** SVE Integer Compare Scalars Group
3113 */
3114
3115 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3116 {
3117 if (!sve_access_check(s)) {
3118 return true;
3119 }
3120
3121 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3122 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3123 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3124 TCGv_i64 cmp = tcg_temp_new_i64();
3125
3126 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3127 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3128 tcg_temp_free_i64(cmp);
3129
3130 /* VF = !NF & !CF. */
3131 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3132 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3133
3134 /* Both NF and VF actually look at bit 31. */
3135 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3136 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3137 return true;
3138 }
3139
3140 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3141 {
3142 if (!sve_access_check(s)) {
3143 return true;
3144 }
3145
3146 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3147 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3148 TCGv_i64 t0 = tcg_temp_new_i64();
3149 TCGv_i64 t1 = tcg_temp_new_i64();
3150 TCGv_i32 t2, t3;
3151 TCGv_ptr ptr;
3152 unsigned desc, vsz = vec_full_reg_size(s);
3153 TCGCond cond;
3154
3155 if (!a->sf) {
3156 if (a->u) {
3157 tcg_gen_ext32u_i64(op0, op0);
3158 tcg_gen_ext32u_i64(op1, op1);
3159 } else {
3160 tcg_gen_ext32s_i64(op0, op0);
3161 tcg_gen_ext32s_i64(op1, op1);
3162 }
3163 }
3164
3165 /* For the helper, compress the different conditions into a computation
3166 * of how many iterations for which the condition is true.
3167 *
3168 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3169 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3170 * aren't that large, so any value >= predicate size is sufficient.
3171 */
3172 tcg_gen_sub_i64(t0, op1, op0);
3173
3174 /* t0 = MIN(op1 - op0, vsz). */
3175 tcg_gen_movi_i64(t1, vsz);
3176 tcg_gen_umin_i64(t0, t0, t1);
3177 if (a->eq) {
3178 /* Equality means one more iteration. */
3179 tcg_gen_addi_i64(t0, t0, 1);
3180 }
3181
3182 /* t0 = (condition true ? t0 : 0). */
3183 cond = (a->u
3184 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3185 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3186 tcg_gen_movi_i64(t1, 0);
3187 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3188
3189 t2 = tcg_temp_new_i32();
3190 tcg_gen_extrl_i64_i32(t2, t0);
3191 tcg_temp_free_i64(t0);
3192 tcg_temp_free_i64(t1);
3193
3194 desc = (vsz / 8) - 2;
3195 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3196 t3 = tcg_const_i32(desc);
3197
3198 ptr = tcg_temp_new_ptr();
3199 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3200
3201 gen_helper_sve_while(t2, ptr, t2, t3);
3202 do_pred_flags(t2);
3203
3204 tcg_temp_free_ptr(ptr);
3205 tcg_temp_free_i32(t2);
3206 tcg_temp_free_i32(t3);
3207 return true;
3208 }
3209
3210 /*
3211 *** SVE Integer Wide Immediate - Unpredicated Group
3212 */
3213
3214 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3215 {
3216 if (a->esz == 0) {
3217 return false;
3218 }
3219 if (sve_access_check(s)) {
3220 unsigned vsz = vec_full_reg_size(s);
3221 int dofs = vec_full_reg_offset(s, a->rd);
3222 uint64_t imm;
3223
3224 /* Decode the VFP immediate. */
3225 imm = vfp_expand_imm(a->esz, a->imm);
3226 imm = dup_const(a->esz, imm);
3227
3228 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3229 }
3230 return true;
3231 }
3232
3233 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3234 {
3235 if (a->esz == 0 && extract32(insn, 13, 1)) {
3236 return false;
3237 }
3238 if (sve_access_check(s)) {
3239 unsigned vsz = vec_full_reg_size(s);
3240 int dofs = vec_full_reg_offset(s, a->rd);
3241
3242 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3243 }
3244 return true;
3245 }
3246
3247 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3248 {
3249 if (a->esz == 0 && extract32(insn, 13, 1)) {
3250 return false;
3251 }
3252 if (sve_access_check(s)) {
3253 unsigned vsz = vec_full_reg_size(s);
3254 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3255 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3256 }
3257 return true;
3258 }
3259
3260 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3261 {
3262 a->imm = -a->imm;
3263 return trans_ADD_zzi(s, a, insn);
3264 }
3265
3266 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3267 {
3268 static const GVecGen2s op[4] = {
3269 { .fni8 = tcg_gen_vec_sub8_i64,
3270 .fniv = tcg_gen_sub_vec,
3271 .fno = gen_helper_sve_subri_b,
3272 .opc = INDEX_op_sub_vec,
3273 .vece = MO_8,
3274 .scalar_first = true },
3275 { .fni8 = tcg_gen_vec_sub16_i64,
3276 .fniv = tcg_gen_sub_vec,
3277 .fno = gen_helper_sve_subri_h,
3278 .opc = INDEX_op_sub_vec,
3279 .vece = MO_16,
3280 .scalar_first = true },
3281 { .fni4 = tcg_gen_sub_i32,
3282 .fniv = tcg_gen_sub_vec,
3283 .fno = gen_helper_sve_subri_s,
3284 .opc = INDEX_op_sub_vec,
3285 .vece = MO_32,
3286 .scalar_first = true },
3287 { .fni8 = tcg_gen_sub_i64,
3288 .fniv = tcg_gen_sub_vec,
3289 .fno = gen_helper_sve_subri_d,
3290 .opc = INDEX_op_sub_vec,
3291 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3292 .vece = MO_64,
3293 .scalar_first = true }
3294 };
3295
3296 if (a->esz == 0 && extract32(insn, 13, 1)) {
3297 return false;
3298 }
3299 if (sve_access_check(s)) {
3300 unsigned vsz = vec_full_reg_size(s);
3301 TCGv_i64 c = tcg_const_i64(a->imm);
3302 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3303 vec_full_reg_offset(s, a->rn),
3304 vsz, vsz, c, &op[a->esz]);
3305 tcg_temp_free_i64(c);
3306 }
3307 return true;
3308 }
3309
3310 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3311 {
3312 if (sve_access_check(s)) {
3313 unsigned vsz = vec_full_reg_size(s);
3314 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3315 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3316 }
3317 return true;
3318 }
3319
3320 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3321 bool u, bool d)
3322 {
3323 if (a->esz == 0 && extract32(insn, 13, 1)) {
3324 return false;
3325 }
3326 if (sve_access_check(s)) {
3327 TCGv_i64 val = tcg_const_i64(a->imm);
3328 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3329 tcg_temp_free_i64(val);
3330 }
3331 return true;
3332 }
3333
3334 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3335 {
3336 return do_zzi_sat(s, a, insn, false, false);
3337 }
3338
3339 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3340 {
3341 return do_zzi_sat(s, a, insn, true, false);
3342 }
3343
3344 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3345 {
3346 return do_zzi_sat(s, a, insn, false, true);
3347 }
3348
3349 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3350 {
3351 return do_zzi_sat(s, a, insn, true, true);
3352 }
3353
3354 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3355 {
3356 if (sve_access_check(s)) {
3357 unsigned vsz = vec_full_reg_size(s);
3358 TCGv_i64 c = tcg_const_i64(a->imm);
3359
3360 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3361 vec_full_reg_offset(s, a->rn),
3362 c, vsz, vsz, 0, fn);
3363 tcg_temp_free_i64(c);
3364 }
3365 return true;
3366 }
3367
3368 #define DO_ZZI(NAME, name) \
3369 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3370 uint32_t insn) \
3371 { \
3372 static gen_helper_gvec_2i * const fns[4] = { \
3373 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3374 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3375 }; \
3376 return do_zzi_ool(s, a, fns[a->esz]); \
3377 }
3378
3379 DO_ZZI(SMAX, smax)
3380 DO_ZZI(UMAX, umax)
3381 DO_ZZI(SMIN, smin)
3382 DO_ZZI(UMIN, umin)
3383
3384 #undef DO_ZZI
3385
3386 /*
3387 *** SVE Floating Point Arithmetic - Unpredicated Group
3388 */
3389
3390 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3391 gen_helper_gvec_3_ptr *fn)
3392 {
3393 if (fn == NULL) {
3394 return false;
3395 }
3396 if (sve_access_check(s)) {
3397 unsigned vsz = vec_full_reg_size(s);
3398 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3399 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3400 vec_full_reg_offset(s, a->rn),
3401 vec_full_reg_offset(s, a->rm),
3402 status, vsz, vsz, 0, fn);
3403 tcg_temp_free_ptr(status);
3404 }
3405 return true;
3406 }
3407
3408
3409 #define DO_FP3(NAME, name) \
3410 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3411 { \
3412 static gen_helper_gvec_3_ptr * const fns[4] = { \
3413 NULL, gen_helper_gvec_##name##_h, \
3414 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3415 }; \
3416 return do_zzz_fp(s, a, fns[a->esz]); \
3417 }
3418
3419 DO_FP3(FADD_zzz, fadd)
3420 DO_FP3(FSUB_zzz, fsub)
3421 DO_FP3(FMUL_zzz, fmul)
3422 DO_FP3(FTSMUL, ftsmul)
3423 DO_FP3(FRECPS, recps)
3424 DO_FP3(FRSQRTS, rsqrts)
3425
3426 #undef DO_FP3
3427
3428
3429 /*
3430 *** SVE Floating Point Unary Operations Predicated Group
3431 */
3432
3433 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3434 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3435 {
3436 if (sve_access_check(s)) {
3437 unsigned vsz = vec_full_reg_size(s);
3438 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3439 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3440 vec_full_reg_offset(s, rn),
3441 pred_full_reg_offset(s, pg),
3442 status, vsz, vsz, 0, fn);
3443 tcg_temp_free_ptr(status);
3444 }
3445 return true;
3446 }
3447
3448 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3449 {
3450 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3451 }
3452
3453 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3454 {
3455 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3456 }
3457
3458 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3459 {
3460 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3461 }
3462
3463 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3464 {
3465 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3466 }
3467
3468 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3469 {
3470 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3471 }
3472
3473 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3474 {
3475 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3476 }
3477
3478 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3479 {
3480 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3481 }
3482
3483 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3484 {
3485 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3486 }
3487
3488 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3489 {
3490 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3491 }
3492
3493 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3494 {
3495 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3496 }
3497
3498 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3499 {
3500 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3501 }
3502
3503 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3504 {
3505 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3506 }
3507
3508 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3509 {
3510 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3511 }
3512
3513 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3514 {
3515 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3516 }
3517
3518 /*
3519 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3520 */
3521
3522 /* Subroutine loading a vector register at VOFS of LEN bytes.
3523 * The load should begin at the address Rn + IMM.
3524 */
3525
3526 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3527 int rn, int imm)
3528 {
3529 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3530 uint32_t len_remain = len % 8;
3531 uint32_t nparts = len / 8 + ctpop8(len_remain);
3532 int midx = get_mem_index(s);
3533 TCGv_i64 addr, t0, t1;
3534
3535 addr = tcg_temp_new_i64();
3536 t0 = tcg_temp_new_i64();
3537
3538 /* Note that unpredicated load/store of vector/predicate registers
3539 * are defined as a stream of bytes, which equates to little-endian
3540 * operations on larger quantities. There is no nice way to force
3541 * a little-endian load for aarch64_be-linux-user out of line.
3542 *
3543 * Attempt to keep code expansion to a minimum by limiting the
3544 * amount of unrolling done.
3545 */
3546 if (nparts <= 4) {
3547 int i;
3548
3549 for (i = 0; i < len_align; i += 8) {
3550 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3551 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3552 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3553 }
3554 } else {
3555 TCGLabel *loop = gen_new_label();
3556 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3557
3558 gen_set_label(loop);
3559
3560 /* Minimize the number of local temps that must be re-read from
3561 * the stack each iteration. Instead, re-compute values other
3562 * than the loop counter.
3563 */
3564 tp = tcg_temp_new_ptr();
3565 tcg_gen_addi_ptr(tp, i, imm);
3566 tcg_gen_extu_ptr_i64(addr, tp);
3567 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3568
3569 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3570
3571 tcg_gen_add_ptr(tp, cpu_env, i);
3572 tcg_gen_addi_ptr(i, i, 8);
3573 tcg_gen_st_i64(t0, tp, vofs);
3574 tcg_temp_free_ptr(tp);
3575
3576 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3577 tcg_temp_free_ptr(i);
3578 }
3579
3580 /* Predicate register loads can be any multiple of 2.
3581 * Note that we still store the entire 64-bit unit into cpu_env.
3582 */
3583 if (len_remain) {
3584 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3585
3586 switch (len_remain) {
3587 case 2:
3588 case 4:
3589 case 8:
3590 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3591 break;
3592
3593 case 6:
3594 t1 = tcg_temp_new_i64();
3595 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3596 tcg_gen_addi_i64(addr, addr, 4);
3597 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3598 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3599 tcg_temp_free_i64(t1);
3600 break;
3601
3602 default:
3603 g_assert_not_reached();
3604 }
3605 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3606 }
3607 tcg_temp_free_i64(addr);
3608 tcg_temp_free_i64(t0);
3609 }
3610
3611 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3612 {
3613 if (sve_access_check(s)) {
3614 int size = vec_full_reg_size(s);
3615 int off = vec_full_reg_offset(s, a->rd);
3616 do_ldr(s, off, size, a->rn, a->imm * size);
3617 }
3618 return true;
3619 }
3620
3621 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3622 {
3623 if (sve_access_check(s)) {
3624 int size = pred_full_reg_size(s);
3625 int off = pred_full_reg_offset(s, a->rd);
3626 do_ldr(s, off, size, a->rn, a->imm * size);
3627 }
3628 return true;
3629 }
3630
3631 /*
3632 *** SVE Memory - Contiguous Load Group
3633 */
3634
3635 /* The memory mode of the dtype. */
3636 static const TCGMemOp dtype_mop[16] = {
3637 MO_UB, MO_UB, MO_UB, MO_UB,
3638 MO_SL, MO_UW, MO_UW, MO_UW,
3639 MO_SW, MO_SW, MO_UL, MO_UL,
3640 MO_SB, MO_SB, MO_SB, MO_Q
3641 };
3642
3643 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
3644
3645 /* The vector element size of dtype. */
3646 static const uint8_t dtype_esz[16] = {
3647 0, 1, 2, 3,
3648 3, 1, 2, 3,
3649 3, 2, 2, 3,
3650 3, 2, 1, 3
3651 };
3652
3653 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3654 gen_helper_gvec_mem *fn)
3655 {
3656 unsigned vsz = vec_full_reg_size(s);
3657 TCGv_ptr t_pg;
3658 TCGv_i32 desc;
3659
3660 /* For e.g. LD4, there are not enough arguments to pass all 4
3661 * registers as pointers, so encode the regno into the data field.
3662 * For consistency, do this even for LD1.
3663 */
3664 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
3665 t_pg = tcg_temp_new_ptr();
3666
3667 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3668 fn(cpu_env, t_pg, addr, desc);
3669
3670 tcg_temp_free_ptr(t_pg);
3671 tcg_temp_free_i32(desc);
3672 }
3673
3674 static void do_ld_zpa(DisasContext *s, int zt, int pg,
3675 TCGv_i64 addr, int dtype, int nreg)
3676 {
3677 static gen_helper_gvec_mem * const fns[16][4] = {
3678 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
3679 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
3680 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
3681 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
3682 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
3683
3684 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
3685 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
3686 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
3687 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
3688 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
3689
3690 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
3691 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
3692 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
3693 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
3694 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
3695
3696 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
3697 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
3698 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
3699 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
3700 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
3701 };
3702 gen_helper_gvec_mem *fn = fns[dtype][nreg];
3703
3704 /* While there are holes in the table, they are not
3705 * accessible via the instruction encoding.
3706 */
3707 assert(fn != NULL);
3708 do_mem_zpa(s, zt, pg, addr, fn);
3709 }
3710
3711 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3712 {
3713 if (a->rm == 31) {
3714 return false;
3715 }
3716 if (sve_access_check(s)) {
3717 TCGv_i64 addr = new_tmp_a64(s);
3718 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
3719 (a->nreg + 1) << dtype_msz(a->dtype));
3720 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3721 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3722 }
3723 return true;
3724 }
3725
3726 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3727 {
3728 if (sve_access_check(s)) {
3729 int vsz = vec_full_reg_size(s);
3730 int elements = vsz >> dtype_esz[a->dtype];
3731 TCGv_i64 addr = new_tmp_a64(s);
3732
3733 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3734 (a->imm * elements * (a->nreg + 1))
3735 << dtype_msz(a->dtype));
3736 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3737 }
3738 return true;
3739 }
3740
3741 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3742 {
3743 static gen_helper_gvec_mem * const fns[16] = {
3744 gen_helper_sve_ldff1bb_r,
3745 gen_helper_sve_ldff1bhu_r,
3746 gen_helper_sve_ldff1bsu_r,
3747 gen_helper_sve_ldff1bdu_r,
3748
3749 gen_helper_sve_ldff1sds_r,
3750 gen_helper_sve_ldff1hh_r,
3751 gen_helper_sve_ldff1hsu_r,
3752 gen_helper_sve_ldff1hdu_r,
3753
3754 gen_helper_sve_ldff1hds_r,
3755 gen_helper_sve_ldff1hss_r,
3756 gen_helper_sve_ldff1ss_r,
3757 gen_helper_sve_ldff1sdu_r,
3758
3759 gen_helper_sve_ldff1bds_r,
3760 gen_helper_sve_ldff1bss_r,
3761 gen_helper_sve_ldff1bhs_r,
3762 gen_helper_sve_ldff1dd_r,
3763 };
3764
3765 if (sve_access_check(s)) {
3766 TCGv_i64 addr = new_tmp_a64(s);
3767 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
3768 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3769 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
3770 }
3771 return true;
3772 }
3773
3774 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3775 {
3776 static gen_helper_gvec_mem * const fns[16] = {
3777 gen_helper_sve_ldnf1bb_r,
3778 gen_helper_sve_ldnf1bhu_r,
3779 gen_helper_sve_ldnf1bsu_r,
3780 gen_helper_sve_ldnf1bdu_r,
3781
3782 gen_helper_sve_ldnf1sds_r,
3783 gen_helper_sve_ldnf1hh_r,
3784 gen_helper_sve_ldnf1hsu_r,
3785 gen_helper_sve_ldnf1hdu_r,
3786
3787 gen_helper_sve_ldnf1hds_r,
3788 gen_helper_sve_ldnf1hss_r,
3789 gen_helper_sve_ldnf1ss_r,
3790 gen_helper_sve_ldnf1sdu_r,
3791
3792 gen_helper_sve_ldnf1bds_r,
3793 gen_helper_sve_ldnf1bss_r,
3794 gen_helper_sve_ldnf1bhs_r,
3795 gen_helper_sve_ldnf1dd_r,
3796 };
3797
3798 if (sve_access_check(s)) {
3799 int vsz = vec_full_reg_size(s);
3800 int elements = vsz >> dtype_esz[a->dtype];
3801 int off = (a->imm * elements) << dtype_msz(a->dtype);
3802 TCGv_i64 addr = new_tmp_a64(s);
3803
3804 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
3805 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
3806 }
3807 return true;
3808 }
3809
3810 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
3811 {
3812 static gen_helper_gvec_mem * const fns[4] = {
3813 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
3814 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
3815 };
3816 unsigned vsz = vec_full_reg_size(s);
3817 TCGv_ptr t_pg;
3818 TCGv_i32 desc;
3819
3820 /* Load the first quadword using the normal predicated load helpers. */
3821 desc = tcg_const_i32(simd_desc(16, 16, zt));
3822 t_pg = tcg_temp_new_ptr();
3823
3824 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3825 fns[msz](cpu_env, t_pg, addr, desc);
3826
3827 tcg_temp_free_ptr(t_pg);
3828 tcg_temp_free_i32(desc);
3829
3830 /* Replicate that first quadword. */
3831 if (vsz > 16) {
3832 unsigned dofs = vec_full_reg_offset(s, zt);
3833 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
3834 }
3835 }
3836
3837 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3838 {
3839 if (a->rm == 31) {
3840 return false;
3841 }
3842 if (sve_access_check(s)) {
3843 int msz = dtype_msz(a->dtype);
3844 TCGv_i64 addr = new_tmp_a64(s);
3845 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
3846 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3847 do_ldrq(s, a->rd, a->pg, addr, msz);
3848 }
3849 return true;
3850 }
3851
3852 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3853 {
3854 if (sve_access_check(s)) {
3855 TCGv_i64 addr = new_tmp_a64(s);
3856 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
3857 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
3858 }
3859 return true;
3860 }
3861
3862 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3863 int msz, int esz, int nreg)
3864 {
3865 static gen_helper_gvec_mem * const fn_single[4][4] = {
3866 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
3867 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
3868 { NULL, gen_helper_sve_st1hh_r,
3869 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
3870 { NULL, NULL,
3871 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
3872 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
3873 };
3874 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
3875 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
3876 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
3877 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
3878 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
3879 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
3880 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
3881 };
3882 gen_helper_gvec_mem *fn;
3883
3884 if (nreg == 0) {
3885 /* ST1 */
3886 fn = fn_single[msz][esz];
3887 } else {
3888 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
3889 assert(msz == esz);
3890 fn = fn_multiple[nreg - 1][msz];
3891 }
3892 assert(fn != NULL);
3893 do_mem_zpa(s, zt, pg, addr, fn);
3894 }
3895
3896 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
3897 {
3898 if (a->rm == 31 || a->msz > a->esz) {
3899 return false;
3900 }
3901 if (sve_access_check(s)) {
3902 TCGv_i64 addr = new_tmp_a64(s);
3903 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
3904 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3905 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
3906 }
3907 return true;
3908 }
3909
3910 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
3911 {
3912 if (a->msz > a->esz) {
3913 return false;
3914 }
3915 if (sve_access_check(s)) {
3916 int vsz = vec_full_reg_size(s);
3917 int elements = vsz >> a->esz;
3918 TCGv_i64 addr = new_tmp_a64(s);
3919
3920 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3921 (a->imm * elements * (a->nreg + 1)) << a->msz);
3922 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
3923 }
3924 return true;
3925 }