]> git.proxmox.com Git - mirror_qemu.git/blob - target/arm/translate-sve.c
target/arm: Implement SVE Bitwise Shift - Unpredicated Group
[mirror_qemu.git] / target / arm / translate-sve.c
1 /*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35
36 /*
37 * Helpers for extracting complex instruction fields.
38 */
39
40 /* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
42 */
43 static int tszimm_esz(int x)
44 {
45 x >>= 3; /* discard imm3 */
46 return 31 - clz32(x);
47 }
48
49 static int tszimm_shr(int x)
50 {
51 return (16 << tszimm_esz(x)) - x;
52 }
53
54 /* See e.g. LSL (immediate, predicated). */
55 static int tszimm_shl(int x)
56 {
57 return x - (8 << tszimm_esz(x));
58 }
59
60 /*
61 * Include the generated decoder.
62 */
63
64 #include "decode-sve.inc.c"
65
66 /*
67 * Implement all of the translator functions referenced by the decoder.
68 */
69
70 /* Return the offset info CPUARMState of the predicate vector register Pn.
71 * Note for this purpose, FFR is P16.
72 */
73 static inline int pred_full_reg_offset(DisasContext *s, int regno)
74 {
75 return offsetof(CPUARMState, vfp.pregs[regno]);
76 }
77
78 /* Return the byte size of the whole predicate register, VL / 64. */
79 static inline int pred_full_reg_size(DisasContext *s)
80 {
81 return s->sve_len >> 3;
82 }
83
84 /* Round up the size of a register to a size allowed by
85 * the tcg vector infrastructure. Any operation which uses this
86 * size may assume that the bits above pred_full_reg_size are zero,
87 * and must leave them the same way.
88 *
89 * Note that this is not needed for the vector registers as they
90 * are always properly sized for tcg vectors.
91 */
92 static int size_for_gvec(int size)
93 {
94 if (size <= 8) {
95 return 8;
96 } else {
97 return QEMU_ALIGN_UP(size, 16);
98 }
99 }
100
101 static int pred_gvec_reg_size(DisasContext *s)
102 {
103 return size_for_gvec(pred_full_reg_size(s));
104 }
105
106 /* Invoke a vector expander on two Zregs. */
107 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
108 int esz, int rd, int rn)
109 {
110 if (sve_access_check(s)) {
111 unsigned vsz = vec_full_reg_size(s);
112 gvec_fn(esz, vec_full_reg_offset(s, rd),
113 vec_full_reg_offset(s, rn), vsz, vsz);
114 }
115 return true;
116 }
117
118 /* Invoke a vector expander on three Zregs. */
119 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
120 int esz, int rd, int rn, int rm)
121 {
122 if (sve_access_check(s)) {
123 unsigned vsz = vec_full_reg_size(s);
124 gvec_fn(esz, vec_full_reg_offset(s, rd),
125 vec_full_reg_offset(s, rn),
126 vec_full_reg_offset(s, rm), vsz, vsz);
127 }
128 return true;
129 }
130
131 /* Invoke a vector move on two Zregs. */
132 static bool do_mov_z(DisasContext *s, int rd, int rn)
133 {
134 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
135 }
136
137 /* Initialize a Zreg with replications of a 64-bit immediate. */
138 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
139 {
140 unsigned vsz = vec_full_reg_size(s);
141 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
142 }
143
144 /* Invoke a vector expander on two Pregs. */
145 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
146 int esz, int rd, int rn)
147 {
148 if (sve_access_check(s)) {
149 unsigned psz = pred_gvec_reg_size(s);
150 gvec_fn(esz, pred_full_reg_offset(s, rd),
151 pred_full_reg_offset(s, rn), psz, psz);
152 }
153 return true;
154 }
155
156 /* Invoke a vector expander on three Pregs. */
157 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
158 int esz, int rd, int rn, int rm)
159 {
160 if (sve_access_check(s)) {
161 unsigned psz = pred_gvec_reg_size(s);
162 gvec_fn(esz, pred_full_reg_offset(s, rd),
163 pred_full_reg_offset(s, rn),
164 pred_full_reg_offset(s, rm), psz, psz);
165 }
166 return true;
167 }
168
169 /* Invoke a vector operation on four Pregs. */
170 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
171 int rd, int rn, int rm, int rg)
172 {
173 if (sve_access_check(s)) {
174 unsigned psz = pred_gvec_reg_size(s);
175 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
176 pred_full_reg_offset(s, rn),
177 pred_full_reg_offset(s, rm),
178 pred_full_reg_offset(s, rg),
179 psz, psz, gvec_op);
180 }
181 return true;
182 }
183
184 /* Invoke a vector move on two Pregs. */
185 static bool do_mov_p(DisasContext *s, int rd, int rn)
186 {
187 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
188 }
189
190 /* Set the cpu flags as per a return from an SVE helper. */
191 static void do_pred_flags(TCGv_i32 t)
192 {
193 tcg_gen_mov_i32(cpu_NF, t);
194 tcg_gen_andi_i32(cpu_ZF, t, 2);
195 tcg_gen_andi_i32(cpu_CF, t, 1);
196 tcg_gen_movi_i32(cpu_VF, 0);
197 }
198
199 /* Subroutines computing the ARM PredTest psuedofunction. */
200 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
201 {
202 TCGv_i32 t = tcg_temp_new_i32();
203
204 gen_helper_sve_predtest1(t, d, g);
205 do_pred_flags(t);
206 tcg_temp_free_i32(t);
207 }
208
209 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
210 {
211 TCGv_ptr dptr = tcg_temp_new_ptr();
212 TCGv_ptr gptr = tcg_temp_new_ptr();
213 TCGv_i32 t;
214
215 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
216 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
217 t = tcg_const_i32(words);
218
219 gen_helper_sve_predtest(t, dptr, gptr, t);
220 tcg_temp_free_ptr(dptr);
221 tcg_temp_free_ptr(gptr);
222
223 do_pred_flags(t);
224 tcg_temp_free_i32(t);
225 }
226
227 /* For each element size, the bits within a predicate word that are active. */
228 const uint64_t pred_esz_masks[4] = {
229 0xffffffffffffffffull, 0x5555555555555555ull,
230 0x1111111111111111ull, 0x0101010101010101ull
231 };
232
233 /*
234 *** SVE Logical - Unpredicated Group
235 */
236
237 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
238 {
239 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
240 }
241
242 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
243 {
244 if (a->rn == a->rm) { /* MOV */
245 return do_mov_z(s, a->rd, a->rn);
246 } else {
247 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
248 }
249 }
250
251 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
252 {
253 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
254 }
255
256 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
257 {
258 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
259 }
260
261 /*
262 *** SVE Integer Arithmetic - Unpredicated Group
263 */
264
265 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
266 {
267 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
268 }
269
270 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
271 {
272 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
273 }
274
275 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
276 {
277 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
278 }
279
280 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
281 {
282 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
283 }
284
285 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
286 {
287 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
288 }
289
290 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291 {
292 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
293 }
294
295 /*
296 *** SVE Integer Arithmetic - Binary Predicated Group
297 */
298
299 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
300 {
301 unsigned vsz = vec_full_reg_size(s);
302 if (fn == NULL) {
303 return false;
304 }
305 if (sve_access_check(s)) {
306 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
307 vec_full_reg_offset(s, a->rn),
308 vec_full_reg_offset(s, a->rm),
309 pred_full_reg_offset(s, a->pg),
310 vsz, vsz, 0, fn);
311 }
312 return true;
313 }
314
315 #define DO_ZPZZ(NAME, name) \
316 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
317 uint32_t insn) \
318 { \
319 static gen_helper_gvec_4 * const fns[4] = { \
320 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
321 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
322 }; \
323 return do_zpzz_ool(s, a, fns[a->esz]); \
324 }
325
326 DO_ZPZZ(AND, and)
327 DO_ZPZZ(EOR, eor)
328 DO_ZPZZ(ORR, orr)
329 DO_ZPZZ(BIC, bic)
330
331 DO_ZPZZ(ADD, add)
332 DO_ZPZZ(SUB, sub)
333
334 DO_ZPZZ(SMAX, smax)
335 DO_ZPZZ(UMAX, umax)
336 DO_ZPZZ(SMIN, smin)
337 DO_ZPZZ(UMIN, umin)
338 DO_ZPZZ(SABD, sabd)
339 DO_ZPZZ(UABD, uabd)
340
341 DO_ZPZZ(MUL, mul)
342 DO_ZPZZ(SMULH, smulh)
343 DO_ZPZZ(UMULH, umulh)
344
345 DO_ZPZZ(ASR, asr)
346 DO_ZPZZ(LSR, lsr)
347 DO_ZPZZ(LSL, lsl)
348
349 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
350 {
351 static gen_helper_gvec_4 * const fns[4] = {
352 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
353 };
354 return do_zpzz_ool(s, a, fns[a->esz]);
355 }
356
357 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
358 {
359 static gen_helper_gvec_4 * const fns[4] = {
360 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
361 };
362 return do_zpzz_ool(s, a, fns[a->esz]);
363 }
364
365 #undef DO_ZPZZ
366
367 /*
368 *** SVE Integer Arithmetic - Unary Predicated Group
369 */
370
371 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
372 {
373 if (fn == NULL) {
374 return false;
375 }
376 if (sve_access_check(s)) {
377 unsigned vsz = vec_full_reg_size(s);
378 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
379 vec_full_reg_offset(s, a->rn),
380 pred_full_reg_offset(s, a->pg),
381 vsz, vsz, 0, fn);
382 }
383 return true;
384 }
385
386 #define DO_ZPZ(NAME, name) \
387 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
388 { \
389 static gen_helper_gvec_3 * const fns[4] = { \
390 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
391 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
392 }; \
393 return do_zpz_ool(s, a, fns[a->esz]); \
394 }
395
396 DO_ZPZ(CLS, cls)
397 DO_ZPZ(CLZ, clz)
398 DO_ZPZ(CNT_zpz, cnt_zpz)
399 DO_ZPZ(CNOT, cnot)
400 DO_ZPZ(NOT_zpz, not_zpz)
401 DO_ZPZ(ABS, abs)
402 DO_ZPZ(NEG, neg)
403
404 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
405 {
406 static gen_helper_gvec_3 * const fns[4] = {
407 NULL,
408 gen_helper_sve_fabs_h,
409 gen_helper_sve_fabs_s,
410 gen_helper_sve_fabs_d
411 };
412 return do_zpz_ool(s, a, fns[a->esz]);
413 }
414
415 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
416 {
417 static gen_helper_gvec_3 * const fns[4] = {
418 NULL,
419 gen_helper_sve_fneg_h,
420 gen_helper_sve_fneg_s,
421 gen_helper_sve_fneg_d
422 };
423 return do_zpz_ool(s, a, fns[a->esz]);
424 }
425
426 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
427 {
428 static gen_helper_gvec_3 * const fns[4] = {
429 NULL,
430 gen_helper_sve_sxtb_h,
431 gen_helper_sve_sxtb_s,
432 gen_helper_sve_sxtb_d
433 };
434 return do_zpz_ool(s, a, fns[a->esz]);
435 }
436
437 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
438 {
439 static gen_helper_gvec_3 * const fns[4] = {
440 NULL,
441 gen_helper_sve_uxtb_h,
442 gen_helper_sve_uxtb_s,
443 gen_helper_sve_uxtb_d
444 };
445 return do_zpz_ool(s, a, fns[a->esz]);
446 }
447
448 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
449 {
450 static gen_helper_gvec_3 * const fns[4] = {
451 NULL, NULL,
452 gen_helper_sve_sxth_s,
453 gen_helper_sve_sxth_d
454 };
455 return do_zpz_ool(s, a, fns[a->esz]);
456 }
457
458 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
459 {
460 static gen_helper_gvec_3 * const fns[4] = {
461 NULL, NULL,
462 gen_helper_sve_uxth_s,
463 gen_helper_sve_uxth_d
464 };
465 return do_zpz_ool(s, a, fns[a->esz]);
466 }
467
468 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
469 {
470 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
471 }
472
473 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
474 {
475 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
476 }
477
478 #undef DO_ZPZ
479
480 /*
481 *** SVE Integer Reduction Group
482 */
483
484 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
485 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
486 gen_helper_gvec_reduc *fn)
487 {
488 unsigned vsz = vec_full_reg_size(s);
489 TCGv_ptr t_zn, t_pg;
490 TCGv_i32 desc;
491 TCGv_i64 temp;
492
493 if (fn == NULL) {
494 return false;
495 }
496 if (!sve_access_check(s)) {
497 return true;
498 }
499
500 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
501 temp = tcg_temp_new_i64();
502 t_zn = tcg_temp_new_ptr();
503 t_pg = tcg_temp_new_ptr();
504
505 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
506 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
507 fn(temp, t_zn, t_pg, desc);
508 tcg_temp_free_ptr(t_zn);
509 tcg_temp_free_ptr(t_pg);
510 tcg_temp_free_i32(desc);
511
512 write_fp_dreg(s, a->rd, temp);
513 tcg_temp_free_i64(temp);
514 return true;
515 }
516
517 #define DO_VPZ(NAME, name) \
518 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
519 { \
520 static gen_helper_gvec_reduc * const fns[4] = { \
521 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
522 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
523 }; \
524 return do_vpz_ool(s, a, fns[a->esz]); \
525 }
526
527 DO_VPZ(ORV, orv)
528 DO_VPZ(ANDV, andv)
529 DO_VPZ(EORV, eorv)
530
531 DO_VPZ(UADDV, uaddv)
532 DO_VPZ(SMAXV, smaxv)
533 DO_VPZ(UMAXV, umaxv)
534 DO_VPZ(SMINV, sminv)
535 DO_VPZ(UMINV, uminv)
536
537 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
538 {
539 static gen_helper_gvec_reduc * const fns[4] = {
540 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
541 gen_helper_sve_saddv_s, NULL
542 };
543 return do_vpz_ool(s, a, fns[a->esz]);
544 }
545
546 #undef DO_VPZ
547
548 /*
549 *** SVE Shift by Immediate - Predicated Group
550 */
551
552 /* Store zero into every active element of Zd. We will use this for two
553 * and three-operand predicated instructions for which logic dictates a
554 * zero result.
555 */
556 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
557 {
558 static gen_helper_gvec_2 * const fns[4] = {
559 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
560 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
561 };
562 if (sve_access_check(s)) {
563 unsigned vsz = vec_full_reg_size(s);
564 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
565 pred_full_reg_offset(s, pg),
566 vsz, vsz, 0, fns[esz]);
567 }
568 return true;
569 }
570
571 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
572 gen_helper_gvec_3 *fn)
573 {
574 if (sve_access_check(s)) {
575 unsigned vsz = vec_full_reg_size(s);
576 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
577 vec_full_reg_offset(s, a->rn),
578 pred_full_reg_offset(s, a->pg),
579 vsz, vsz, a->imm, fn);
580 }
581 return true;
582 }
583
584 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
585 {
586 static gen_helper_gvec_3 * const fns[4] = {
587 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
588 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
589 };
590 if (a->esz < 0) {
591 /* Invalid tsz encoding -- see tszimm_esz. */
592 return false;
593 }
594 /* Shift by element size is architecturally valid. For
595 arithmetic right-shift, it's the same as by one less. */
596 a->imm = MIN(a->imm, (8 << a->esz) - 1);
597 return do_zpzi_ool(s, a, fns[a->esz]);
598 }
599
600 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
601 {
602 static gen_helper_gvec_3 * const fns[4] = {
603 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
604 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
605 };
606 if (a->esz < 0) {
607 return false;
608 }
609 /* Shift by element size is architecturally valid.
610 For logical shifts, it is a zeroing operation. */
611 if (a->imm >= (8 << a->esz)) {
612 return do_clr_zp(s, a->rd, a->pg, a->esz);
613 } else {
614 return do_zpzi_ool(s, a, fns[a->esz]);
615 }
616 }
617
618 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
619 {
620 static gen_helper_gvec_3 * const fns[4] = {
621 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
622 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
623 };
624 if (a->esz < 0) {
625 return false;
626 }
627 /* Shift by element size is architecturally valid.
628 For logical shifts, it is a zeroing operation. */
629 if (a->imm >= (8 << a->esz)) {
630 return do_clr_zp(s, a->rd, a->pg, a->esz);
631 } else {
632 return do_zpzi_ool(s, a, fns[a->esz]);
633 }
634 }
635
636 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
637 {
638 static gen_helper_gvec_3 * const fns[4] = {
639 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
640 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
641 };
642 if (a->esz < 0) {
643 return false;
644 }
645 /* Shift by element size is architecturally valid. For arithmetic
646 right shift for division, it is a zeroing operation. */
647 if (a->imm >= (8 << a->esz)) {
648 return do_clr_zp(s, a->rd, a->pg, a->esz);
649 } else {
650 return do_zpzi_ool(s, a, fns[a->esz]);
651 }
652 }
653
654 /*
655 *** SVE Bitwise Shift - Predicated Group
656 */
657
658 #define DO_ZPZW(NAME, name) \
659 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
660 uint32_t insn) \
661 { \
662 static gen_helper_gvec_4 * const fns[3] = { \
663 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
664 gen_helper_sve_##name##_zpzw_s, \
665 }; \
666 if (a->esz < 0 || a->esz >= 3) { \
667 return false; \
668 } \
669 return do_zpzz_ool(s, a, fns[a->esz]); \
670 }
671
672 DO_ZPZW(ASR, asr)
673 DO_ZPZW(LSR, lsr)
674 DO_ZPZW(LSL, lsl)
675
676 #undef DO_ZPZW
677
678 /*
679 *** SVE Bitwise Shift - Unpredicated Group
680 */
681
682 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
683 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
684 int64_t, uint32_t, uint32_t))
685 {
686 if (a->esz < 0) {
687 /* Invalid tsz encoding -- see tszimm_esz. */
688 return false;
689 }
690 if (sve_access_check(s)) {
691 unsigned vsz = vec_full_reg_size(s);
692 /* Shift by element size is architecturally valid. For
693 arithmetic right-shift, it's the same as by one less.
694 Otherwise it is a zeroing operation. */
695 if (a->imm >= 8 << a->esz) {
696 if (asr) {
697 a->imm = (8 << a->esz) - 1;
698 } else {
699 do_dupi_z(s, a->rd, 0);
700 return true;
701 }
702 }
703 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
704 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
705 }
706 return true;
707 }
708
709 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
710 {
711 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
712 }
713
714 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
715 {
716 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
717 }
718
719 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
720 {
721 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
722 }
723
724 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
725 {
726 if (fn == NULL) {
727 return false;
728 }
729 if (sve_access_check(s)) {
730 unsigned vsz = vec_full_reg_size(s);
731 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
732 vec_full_reg_offset(s, a->rn),
733 vec_full_reg_offset(s, a->rm),
734 vsz, vsz, 0, fn);
735 }
736 return true;
737 }
738
739 #define DO_ZZW(NAME, name) \
740 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
741 uint32_t insn) \
742 { \
743 static gen_helper_gvec_3 * const fns[4] = { \
744 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
745 gen_helper_sve_##name##_zzw_s, NULL \
746 }; \
747 return do_zzw_ool(s, a, fns[a->esz]); \
748 }
749
750 DO_ZZW(ASR, asr)
751 DO_ZZW(LSR, lsr)
752 DO_ZZW(LSL, lsl)
753
754 #undef DO_ZZW
755
756 /*
757 *** SVE Integer Multiply-Add Group
758 */
759
760 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
761 gen_helper_gvec_5 *fn)
762 {
763 if (sve_access_check(s)) {
764 unsigned vsz = vec_full_reg_size(s);
765 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
766 vec_full_reg_offset(s, a->ra),
767 vec_full_reg_offset(s, a->rn),
768 vec_full_reg_offset(s, a->rm),
769 pred_full_reg_offset(s, a->pg),
770 vsz, vsz, 0, fn);
771 }
772 return true;
773 }
774
775 #define DO_ZPZZZ(NAME, name) \
776 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
777 { \
778 static gen_helper_gvec_5 * const fns[4] = { \
779 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
780 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
781 }; \
782 return do_zpzzz_ool(s, a, fns[a->esz]); \
783 }
784
785 DO_ZPZZZ(MLA, mla)
786 DO_ZPZZZ(MLS, mls)
787
788 #undef DO_ZPZZZ
789
790 /*
791 *** SVE Index Generation Group
792 */
793
794 static void do_index(DisasContext *s, int esz, int rd,
795 TCGv_i64 start, TCGv_i64 incr)
796 {
797 unsigned vsz = vec_full_reg_size(s);
798 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
799 TCGv_ptr t_zd = tcg_temp_new_ptr();
800
801 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
802 if (esz == 3) {
803 gen_helper_sve_index_d(t_zd, start, incr, desc);
804 } else {
805 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
806 static index_fn * const fns[3] = {
807 gen_helper_sve_index_b,
808 gen_helper_sve_index_h,
809 gen_helper_sve_index_s,
810 };
811 TCGv_i32 s32 = tcg_temp_new_i32();
812 TCGv_i32 i32 = tcg_temp_new_i32();
813
814 tcg_gen_extrl_i64_i32(s32, start);
815 tcg_gen_extrl_i64_i32(i32, incr);
816 fns[esz](t_zd, s32, i32, desc);
817
818 tcg_temp_free_i32(s32);
819 tcg_temp_free_i32(i32);
820 }
821 tcg_temp_free_ptr(t_zd);
822 tcg_temp_free_i32(desc);
823 }
824
825 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
826 {
827 if (sve_access_check(s)) {
828 TCGv_i64 start = tcg_const_i64(a->imm1);
829 TCGv_i64 incr = tcg_const_i64(a->imm2);
830 do_index(s, a->esz, a->rd, start, incr);
831 tcg_temp_free_i64(start);
832 tcg_temp_free_i64(incr);
833 }
834 return true;
835 }
836
837 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
838 {
839 if (sve_access_check(s)) {
840 TCGv_i64 start = tcg_const_i64(a->imm);
841 TCGv_i64 incr = cpu_reg(s, a->rm);
842 do_index(s, a->esz, a->rd, start, incr);
843 tcg_temp_free_i64(start);
844 }
845 return true;
846 }
847
848 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
849 {
850 if (sve_access_check(s)) {
851 TCGv_i64 start = cpu_reg(s, a->rn);
852 TCGv_i64 incr = tcg_const_i64(a->imm);
853 do_index(s, a->esz, a->rd, start, incr);
854 tcg_temp_free_i64(incr);
855 }
856 return true;
857 }
858
859 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
860 {
861 if (sve_access_check(s)) {
862 TCGv_i64 start = cpu_reg(s, a->rn);
863 TCGv_i64 incr = cpu_reg(s, a->rm);
864 do_index(s, a->esz, a->rd, start, incr);
865 }
866 return true;
867 }
868
869 /*
870 *** SVE Stack Allocation Group
871 */
872
873 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
874 {
875 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
876 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
877 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
878 return true;
879 }
880
881 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
882 {
883 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
884 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
885 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
886 return true;
887 }
888
889 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
890 {
891 TCGv_i64 reg = cpu_reg(s, a->rd);
892 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
893 return true;
894 }
895
896 /*
897 *** SVE Predicate Logical Operations Group
898 */
899
900 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
901 const GVecGen4 *gvec_op)
902 {
903 if (!sve_access_check(s)) {
904 return true;
905 }
906
907 unsigned psz = pred_gvec_reg_size(s);
908 int dofs = pred_full_reg_offset(s, a->rd);
909 int nofs = pred_full_reg_offset(s, a->rn);
910 int mofs = pred_full_reg_offset(s, a->rm);
911 int gofs = pred_full_reg_offset(s, a->pg);
912
913 if (psz == 8) {
914 /* Do the operation and the flags generation in temps. */
915 TCGv_i64 pd = tcg_temp_new_i64();
916 TCGv_i64 pn = tcg_temp_new_i64();
917 TCGv_i64 pm = tcg_temp_new_i64();
918 TCGv_i64 pg = tcg_temp_new_i64();
919
920 tcg_gen_ld_i64(pn, cpu_env, nofs);
921 tcg_gen_ld_i64(pm, cpu_env, mofs);
922 tcg_gen_ld_i64(pg, cpu_env, gofs);
923
924 gvec_op->fni8(pd, pn, pm, pg);
925 tcg_gen_st_i64(pd, cpu_env, dofs);
926
927 do_predtest1(pd, pg);
928
929 tcg_temp_free_i64(pd);
930 tcg_temp_free_i64(pn);
931 tcg_temp_free_i64(pm);
932 tcg_temp_free_i64(pg);
933 } else {
934 /* The operation and flags generation is large. The computation
935 * of the flags depends on the original contents of the guarding
936 * predicate. If the destination overwrites the guarding predicate,
937 * then the easiest way to get this right is to save a copy.
938 */
939 int tofs = gofs;
940 if (a->rd == a->pg) {
941 tofs = offsetof(CPUARMState, vfp.preg_tmp);
942 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
943 }
944
945 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
946 do_predtest(s, dofs, tofs, psz / 8);
947 }
948 return true;
949 }
950
951 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
952 {
953 tcg_gen_and_i64(pd, pn, pm);
954 tcg_gen_and_i64(pd, pd, pg);
955 }
956
957 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
958 TCGv_vec pm, TCGv_vec pg)
959 {
960 tcg_gen_and_vec(vece, pd, pn, pm);
961 tcg_gen_and_vec(vece, pd, pd, pg);
962 }
963
964 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
965 {
966 static const GVecGen4 op = {
967 .fni8 = gen_and_pg_i64,
968 .fniv = gen_and_pg_vec,
969 .fno = gen_helper_sve_and_pppp,
970 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
971 };
972 if (a->s) {
973 return do_pppp_flags(s, a, &op);
974 } else if (a->rn == a->rm) {
975 if (a->pg == a->rn) {
976 return do_mov_p(s, a->rd, a->rn);
977 } else {
978 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
979 }
980 } else if (a->pg == a->rn || a->pg == a->rm) {
981 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
982 } else {
983 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
984 }
985 }
986
987 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
988 {
989 tcg_gen_andc_i64(pd, pn, pm);
990 tcg_gen_and_i64(pd, pd, pg);
991 }
992
993 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
994 TCGv_vec pm, TCGv_vec pg)
995 {
996 tcg_gen_andc_vec(vece, pd, pn, pm);
997 tcg_gen_and_vec(vece, pd, pd, pg);
998 }
999
1000 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1001 {
1002 static const GVecGen4 op = {
1003 .fni8 = gen_bic_pg_i64,
1004 .fniv = gen_bic_pg_vec,
1005 .fno = gen_helper_sve_bic_pppp,
1006 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1007 };
1008 if (a->s) {
1009 return do_pppp_flags(s, a, &op);
1010 } else if (a->pg == a->rn) {
1011 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1012 } else {
1013 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1014 }
1015 }
1016
1017 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1018 {
1019 tcg_gen_xor_i64(pd, pn, pm);
1020 tcg_gen_and_i64(pd, pd, pg);
1021 }
1022
1023 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1024 TCGv_vec pm, TCGv_vec pg)
1025 {
1026 tcg_gen_xor_vec(vece, pd, pn, pm);
1027 tcg_gen_and_vec(vece, pd, pd, pg);
1028 }
1029
1030 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1031 {
1032 static const GVecGen4 op = {
1033 .fni8 = gen_eor_pg_i64,
1034 .fniv = gen_eor_pg_vec,
1035 .fno = gen_helper_sve_eor_pppp,
1036 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1037 };
1038 if (a->s) {
1039 return do_pppp_flags(s, a, &op);
1040 } else {
1041 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1042 }
1043 }
1044
1045 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1046 {
1047 tcg_gen_and_i64(pn, pn, pg);
1048 tcg_gen_andc_i64(pm, pm, pg);
1049 tcg_gen_or_i64(pd, pn, pm);
1050 }
1051
1052 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1053 TCGv_vec pm, TCGv_vec pg)
1054 {
1055 tcg_gen_and_vec(vece, pn, pn, pg);
1056 tcg_gen_andc_vec(vece, pm, pm, pg);
1057 tcg_gen_or_vec(vece, pd, pn, pm);
1058 }
1059
1060 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1061 {
1062 static const GVecGen4 op = {
1063 .fni8 = gen_sel_pg_i64,
1064 .fniv = gen_sel_pg_vec,
1065 .fno = gen_helper_sve_sel_pppp,
1066 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1067 };
1068 if (a->s) {
1069 return false;
1070 } else {
1071 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1072 }
1073 }
1074
1075 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1076 {
1077 tcg_gen_or_i64(pd, pn, pm);
1078 tcg_gen_and_i64(pd, pd, pg);
1079 }
1080
1081 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1082 TCGv_vec pm, TCGv_vec pg)
1083 {
1084 tcg_gen_or_vec(vece, pd, pn, pm);
1085 tcg_gen_and_vec(vece, pd, pd, pg);
1086 }
1087
1088 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1089 {
1090 static const GVecGen4 op = {
1091 .fni8 = gen_orr_pg_i64,
1092 .fniv = gen_orr_pg_vec,
1093 .fno = gen_helper_sve_orr_pppp,
1094 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1095 };
1096 if (a->s) {
1097 return do_pppp_flags(s, a, &op);
1098 } else if (a->pg == a->rn && a->rn == a->rm) {
1099 return do_mov_p(s, a->rd, a->rn);
1100 } else {
1101 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1102 }
1103 }
1104
1105 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1106 {
1107 tcg_gen_orc_i64(pd, pn, pm);
1108 tcg_gen_and_i64(pd, pd, pg);
1109 }
1110
1111 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1112 TCGv_vec pm, TCGv_vec pg)
1113 {
1114 tcg_gen_orc_vec(vece, pd, pn, pm);
1115 tcg_gen_and_vec(vece, pd, pd, pg);
1116 }
1117
1118 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1119 {
1120 static const GVecGen4 op = {
1121 .fni8 = gen_orn_pg_i64,
1122 .fniv = gen_orn_pg_vec,
1123 .fno = gen_helper_sve_orn_pppp,
1124 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1125 };
1126 if (a->s) {
1127 return do_pppp_flags(s, a, &op);
1128 } else {
1129 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1130 }
1131 }
1132
1133 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1134 {
1135 tcg_gen_or_i64(pd, pn, pm);
1136 tcg_gen_andc_i64(pd, pg, pd);
1137 }
1138
1139 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1140 TCGv_vec pm, TCGv_vec pg)
1141 {
1142 tcg_gen_or_vec(vece, pd, pn, pm);
1143 tcg_gen_andc_vec(vece, pd, pg, pd);
1144 }
1145
1146 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1147 {
1148 static const GVecGen4 op = {
1149 .fni8 = gen_nor_pg_i64,
1150 .fniv = gen_nor_pg_vec,
1151 .fno = gen_helper_sve_nor_pppp,
1152 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1153 };
1154 if (a->s) {
1155 return do_pppp_flags(s, a, &op);
1156 } else {
1157 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1158 }
1159 }
1160
1161 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1162 {
1163 tcg_gen_and_i64(pd, pn, pm);
1164 tcg_gen_andc_i64(pd, pg, pd);
1165 }
1166
1167 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1168 TCGv_vec pm, TCGv_vec pg)
1169 {
1170 tcg_gen_and_vec(vece, pd, pn, pm);
1171 tcg_gen_andc_vec(vece, pd, pg, pd);
1172 }
1173
1174 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1175 {
1176 static const GVecGen4 op = {
1177 .fni8 = gen_nand_pg_i64,
1178 .fniv = gen_nand_pg_vec,
1179 .fno = gen_helper_sve_nand_pppp,
1180 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1181 };
1182 if (a->s) {
1183 return do_pppp_flags(s, a, &op);
1184 } else {
1185 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1186 }
1187 }
1188
1189 /*
1190 *** SVE Predicate Misc Group
1191 */
1192
1193 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1194 {
1195 if (sve_access_check(s)) {
1196 int nofs = pred_full_reg_offset(s, a->rn);
1197 int gofs = pred_full_reg_offset(s, a->pg);
1198 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1199
1200 if (words == 1) {
1201 TCGv_i64 pn = tcg_temp_new_i64();
1202 TCGv_i64 pg = tcg_temp_new_i64();
1203
1204 tcg_gen_ld_i64(pn, cpu_env, nofs);
1205 tcg_gen_ld_i64(pg, cpu_env, gofs);
1206 do_predtest1(pn, pg);
1207
1208 tcg_temp_free_i64(pn);
1209 tcg_temp_free_i64(pg);
1210 } else {
1211 do_predtest(s, nofs, gofs, words);
1212 }
1213 }
1214 return true;
1215 }
1216
1217 /* See the ARM pseudocode DecodePredCount. */
1218 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1219 {
1220 unsigned elements = fullsz >> esz;
1221 unsigned bound;
1222
1223 switch (pattern) {
1224 case 0x0: /* POW2 */
1225 return pow2floor(elements);
1226 case 0x1: /* VL1 */
1227 case 0x2: /* VL2 */
1228 case 0x3: /* VL3 */
1229 case 0x4: /* VL4 */
1230 case 0x5: /* VL5 */
1231 case 0x6: /* VL6 */
1232 case 0x7: /* VL7 */
1233 case 0x8: /* VL8 */
1234 bound = pattern;
1235 break;
1236 case 0x9: /* VL16 */
1237 case 0xa: /* VL32 */
1238 case 0xb: /* VL64 */
1239 case 0xc: /* VL128 */
1240 case 0xd: /* VL256 */
1241 bound = 16 << (pattern - 9);
1242 break;
1243 case 0x1d: /* MUL4 */
1244 return elements - elements % 4;
1245 case 0x1e: /* MUL3 */
1246 return elements - elements % 3;
1247 case 0x1f: /* ALL */
1248 return elements;
1249 default: /* #uimm5 */
1250 return 0;
1251 }
1252 return elements >= bound ? bound : 0;
1253 }
1254
1255 /* This handles all of the predicate initialization instructions,
1256 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1257 * so that decode_pred_count returns 0. For SETFFR, we will have
1258 * set RD == 16 == FFR.
1259 */
1260 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1261 {
1262 if (!sve_access_check(s)) {
1263 return true;
1264 }
1265
1266 unsigned fullsz = vec_full_reg_size(s);
1267 unsigned ofs = pred_full_reg_offset(s, rd);
1268 unsigned numelem, setsz, i;
1269 uint64_t word, lastword;
1270 TCGv_i64 t;
1271
1272 numelem = decode_pred_count(fullsz, pat, esz);
1273
1274 /* Determine what we must store into each bit, and how many. */
1275 if (numelem == 0) {
1276 lastword = word = 0;
1277 setsz = fullsz;
1278 } else {
1279 setsz = numelem << esz;
1280 lastword = word = pred_esz_masks[esz];
1281 if (setsz % 64) {
1282 lastword &= ~(-1ull << (setsz % 64));
1283 }
1284 }
1285
1286 t = tcg_temp_new_i64();
1287 if (fullsz <= 64) {
1288 tcg_gen_movi_i64(t, lastword);
1289 tcg_gen_st_i64(t, cpu_env, ofs);
1290 goto done;
1291 }
1292
1293 if (word == lastword) {
1294 unsigned maxsz = size_for_gvec(fullsz / 8);
1295 unsigned oprsz = size_for_gvec(setsz / 8);
1296
1297 if (oprsz * 8 == setsz) {
1298 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1299 goto done;
1300 }
1301 if (oprsz * 8 == setsz + 8) {
1302 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1303 tcg_gen_movi_i64(t, 0);
1304 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1305 goto done;
1306 }
1307 }
1308
1309 setsz /= 8;
1310 fullsz /= 8;
1311
1312 tcg_gen_movi_i64(t, word);
1313 for (i = 0; i < setsz; i += 8) {
1314 tcg_gen_st_i64(t, cpu_env, ofs + i);
1315 }
1316 if (lastword != word) {
1317 tcg_gen_movi_i64(t, lastword);
1318 tcg_gen_st_i64(t, cpu_env, ofs + i);
1319 i += 8;
1320 }
1321 if (i < fullsz) {
1322 tcg_gen_movi_i64(t, 0);
1323 for (; i < fullsz; i += 8) {
1324 tcg_gen_st_i64(t, cpu_env, ofs + i);
1325 }
1326 }
1327
1328 done:
1329 tcg_temp_free_i64(t);
1330
1331 /* PTRUES */
1332 if (setflag) {
1333 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1334 tcg_gen_movi_i32(cpu_CF, word == 0);
1335 tcg_gen_movi_i32(cpu_VF, 0);
1336 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1337 }
1338 return true;
1339 }
1340
1341 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1342 {
1343 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1344 }
1345
1346 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1347 {
1348 /* Note pat == 31 is #all, to set all elements. */
1349 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1350 }
1351
1352 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1353 {
1354 /* Note pat == 32 is #unimp, to set no elements. */
1355 return do_predset(s, 0, a->rd, 32, false);
1356 }
1357
1358 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1359 {
1360 /* The path through do_pppp_flags is complicated enough to want to avoid
1361 * duplication. Frob the arguments into the form of a predicated AND.
1362 */
1363 arg_rprr_s alt_a = {
1364 .rd = a->rd, .pg = a->pg, .s = a->s,
1365 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1366 };
1367 return trans_AND_pppp(s, &alt_a, insn);
1368 }
1369
1370 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1371 {
1372 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1373 }
1374
1375 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1376 {
1377 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1378 }
1379
1380 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1381 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1382 TCGv_ptr, TCGv_i32))
1383 {
1384 if (!sve_access_check(s)) {
1385 return true;
1386 }
1387
1388 TCGv_ptr t_pd = tcg_temp_new_ptr();
1389 TCGv_ptr t_pg = tcg_temp_new_ptr();
1390 TCGv_i32 t;
1391 unsigned desc;
1392
1393 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1394 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1395
1396 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1397 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1398 t = tcg_const_i32(desc);
1399
1400 gen_fn(t, t_pd, t_pg, t);
1401 tcg_temp_free_ptr(t_pd);
1402 tcg_temp_free_ptr(t_pg);
1403
1404 do_pred_flags(t);
1405 tcg_temp_free_i32(t);
1406 return true;
1407 }
1408
1409 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1410 {
1411 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1412 }
1413
1414 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1415 {
1416 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1417 }
1418
1419 /*
1420 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1421 */
1422
1423 /* Subroutine loading a vector register at VOFS of LEN bytes.
1424 * The load should begin at the address Rn + IMM.
1425 */
1426
1427 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
1428 int rn, int imm)
1429 {
1430 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
1431 uint32_t len_remain = len % 8;
1432 uint32_t nparts = len / 8 + ctpop8(len_remain);
1433 int midx = get_mem_index(s);
1434 TCGv_i64 addr, t0, t1;
1435
1436 addr = tcg_temp_new_i64();
1437 t0 = tcg_temp_new_i64();
1438
1439 /* Note that unpredicated load/store of vector/predicate registers
1440 * are defined as a stream of bytes, which equates to little-endian
1441 * operations on larger quantities. There is no nice way to force
1442 * a little-endian load for aarch64_be-linux-user out of line.
1443 *
1444 * Attempt to keep code expansion to a minimum by limiting the
1445 * amount of unrolling done.
1446 */
1447 if (nparts <= 4) {
1448 int i;
1449
1450 for (i = 0; i < len_align; i += 8) {
1451 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
1452 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1453 tcg_gen_st_i64(t0, cpu_env, vofs + i);
1454 }
1455 } else {
1456 TCGLabel *loop = gen_new_label();
1457 TCGv_ptr tp, i = tcg_const_local_ptr(0);
1458
1459 gen_set_label(loop);
1460
1461 /* Minimize the number of local temps that must be re-read from
1462 * the stack each iteration. Instead, re-compute values other
1463 * than the loop counter.
1464 */
1465 tp = tcg_temp_new_ptr();
1466 tcg_gen_addi_ptr(tp, i, imm);
1467 tcg_gen_extu_ptr_i64(addr, tp);
1468 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
1469
1470 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1471
1472 tcg_gen_add_ptr(tp, cpu_env, i);
1473 tcg_gen_addi_ptr(i, i, 8);
1474 tcg_gen_st_i64(t0, tp, vofs);
1475 tcg_temp_free_ptr(tp);
1476
1477 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
1478 tcg_temp_free_ptr(i);
1479 }
1480
1481 /* Predicate register loads can be any multiple of 2.
1482 * Note that we still store the entire 64-bit unit into cpu_env.
1483 */
1484 if (len_remain) {
1485 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
1486
1487 switch (len_remain) {
1488 case 2:
1489 case 4:
1490 case 8:
1491 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
1492 break;
1493
1494 case 6:
1495 t1 = tcg_temp_new_i64();
1496 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
1497 tcg_gen_addi_i64(addr, addr, 4);
1498 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
1499 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
1500 tcg_temp_free_i64(t1);
1501 break;
1502
1503 default:
1504 g_assert_not_reached();
1505 }
1506 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
1507 }
1508 tcg_temp_free_i64(addr);
1509 tcg_temp_free_i64(t0);
1510 }
1511
1512 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
1513 {
1514 if (sve_access_check(s)) {
1515 int size = vec_full_reg_size(s);
1516 int off = vec_full_reg_offset(s, a->rd);
1517 do_ldr(s, off, size, a->rn, a->imm * size);
1518 }
1519 return true;
1520 }
1521
1522 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
1523 {
1524 if (sve_access_check(s)) {
1525 int size = pred_full_reg_size(s);
1526 int off = pred_full_reg_offset(s, a->rd);
1527 do_ldr(s, off, size, a->rn, a->imm * size);
1528 }
1529 return true;
1530 }