]> git.proxmox.com Git - mirror_qemu.git/blob - target/arm/translate-sve.c
target/arm: Implement SVE Integer Arithmetic - Unpredicated Group
[mirror_qemu.git] / target / arm / translate-sve.c
1 /*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35
36 /*
37 * Helpers for extracting complex instruction fields.
38 */
39
40 /* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
42 */
43 static int tszimm_esz(int x)
44 {
45 x >>= 3; /* discard imm3 */
46 return 31 - clz32(x);
47 }
48
49 static int tszimm_shr(int x)
50 {
51 return (16 << tszimm_esz(x)) - x;
52 }
53
54 /* See e.g. LSL (immediate, predicated). */
55 static int tszimm_shl(int x)
56 {
57 return x - (8 << tszimm_esz(x));
58 }
59
60 /*
61 * Include the generated decoder.
62 */
63
64 #include "decode-sve.inc.c"
65
66 /*
67 * Implement all of the translator functions referenced by the decoder.
68 */
69
70 /* Return the offset info CPUARMState of the predicate vector register Pn.
71 * Note for this purpose, FFR is P16.
72 */
73 static inline int pred_full_reg_offset(DisasContext *s, int regno)
74 {
75 return offsetof(CPUARMState, vfp.pregs[regno]);
76 }
77
78 /* Return the byte size of the whole predicate register, VL / 64. */
79 static inline int pred_full_reg_size(DisasContext *s)
80 {
81 return s->sve_len >> 3;
82 }
83
84 /* Round up the size of a register to a size allowed by
85 * the tcg vector infrastructure. Any operation which uses this
86 * size may assume that the bits above pred_full_reg_size are zero,
87 * and must leave them the same way.
88 *
89 * Note that this is not needed for the vector registers as they
90 * are always properly sized for tcg vectors.
91 */
92 static int size_for_gvec(int size)
93 {
94 if (size <= 8) {
95 return 8;
96 } else {
97 return QEMU_ALIGN_UP(size, 16);
98 }
99 }
100
101 static int pred_gvec_reg_size(DisasContext *s)
102 {
103 return size_for_gvec(pred_full_reg_size(s));
104 }
105
106 /* Invoke a vector expander on two Zregs. */
107 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
108 int esz, int rd, int rn)
109 {
110 if (sve_access_check(s)) {
111 unsigned vsz = vec_full_reg_size(s);
112 gvec_fn(esz, vec_full_reg_offset(s, rd),
113 vec_full_reg_offset(s, rn), vsz, vsz);
114 }
115 return true;
116 }
117
118 /* Invoke a vector expander on three Zregs. */
119 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
120 int esz, int rd, int rn, int rm)
121 {
122 if (sve_access_check(s)) {
123 unsigned vsz = vec_full_reg_size(s);
124 gvec_fn(esz, vec_full_reg_offset(s, rd),
125 vec_full_reg_offset(s, rn),
126 vec_full_reg_offset(s, rm), vsz, vsz);
127 }
128 return true;
129 }
130
131 /* Invoke a vector move on two Zregs. */
132 static bool do_mov_z(DisasContext *s, int rd, int rn)
133 {
134 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
135 }
136
137 /* Invoke a vector expander on two Pregs. */
138 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
139 int esz, int rd, int rn)
140 {
141 if (sve_access_check(s)) {
142 unsigned psz = pred_gvec_reg_size(s);
143 gvec_fn(esz, pred_full_reg_offset(s, rd),
144 pred_full_reg_offset(s, rn), psz, psz);
145 }
146 return true;
147 }
148
149 /* Invoke a vector expander on three Pregs. */
150 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
151 int esz, int rd, int rn, int rm)
152 {
153 if (sve_access_check(s)) {
154 unsigned psz = pred_gvec_reg_size(s);
155 gvec_fn(esz, pred_full_reg_offset(s, rd),
156 pred_full_reg_offset(s, rn),
157 pred_full_reg_offset(s, rm), psz, psz);
158 }
159 return true;
160 }
161
162 /* Invoke a vector operation on four Pregs. */
163 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
164 int rd, int rn, int rm, int rg)
165 {
166 if (sve_access_check(s)) {
167 unsigned psz = pred_gvec_reg_size(s);
168 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
169 pred_full_reg_offset(s, rn),
170 pred_full_reg_offset(s, rm),
171 pred_full_reg_offset(s, rg),
172 psz, psz, gvec_op);
173 }
174 return true;
175 }
176
177 /* Invoke a vector move on two Pregs. */
178 static bool do_mov_p(DisasContext *s, int rd, int rn)
179 {
180 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
181 }
182
183 /* Set the cpu flags as per a return from an SVE helper. */
184 static void do_pred_flags(TCGv_i32 t)
185 {
186 tcg_gen_mov_i32(cpu_NF, t);
187 tcg_gen_andi_i32(cpu_ZF, t, 2);
188 tcg_gen_andi_i32(cpu_CF, t, 1);
189 tcg_gen_movi_i32(cpu_VF, 0);
190 }
191
192 /* Subroutines computing the ARM PredTest psuedofunction. */
193 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
194 {
195 TCGv_i32 t = tcg_temp_new_i32();
196
197 gen_helper_sve_predtest1(t, d, g);
198 do_pred_flags(t);
199 tcg_temp_free_i32(t);
200 }
201
202 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
203 {
204 TCGv_ptr dptr = tcg_temp_new_ptr();
205 TCGv_ptr gptr = tcg_temp_new_ptr();
206 TCGv_i32 t;
207
208 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
209 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
210 t = tcg_const_i32(words);
211
212 gen_helper_sve_predtest(t, dptr, gptr, t);
213 tcg_temp_free_ptr(dptr);
214 tcg_temp_free_ptr(gptr);
215
216 do_pred_flags(t);
217 tcg_temp_free_i32(t);
218 }
219
220 /* For each element size, the bits within a predicate word that are active. */
221 const uint64_t pred_esz_masks[4] = {
222 0xffffffffffffffffull, 0x5555555555555555ull,
223 0x1111111111111111ull, 0x0101010101010101ull
224 };
225
226 /*
227 *** SVE Logical - Unpredicated Group
228 */
229
230 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
231 {
232 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
233 }
234
235 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
236 {
237 if (a->rn == a->rm) { /* MOV */
238 return do_mov_z(s, a->rd, a->rn);
239 } else {
240 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
241 }
242 }
243
244 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
245 {
246 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
247 }
248
249 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
250 {
251 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
252 }
253
254 /*
255 *** SVE Integer Arithmetic - Unpredicated Group
256 */
257
258 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
259 {
260 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
261 }
262
263 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
264 {
265 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
266 }
267
268 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
269 {
270 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
271 }
272
273 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
274 {
275 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
276 }
277
278 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
279 {
280 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
281 }
282
283 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
284 {
285 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
286 }
287
288 /*
289 *** SVE Integer Arithmetic - Binary Predicated Group
290 */
291
292 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
293 {
294 unsigned vsz = vec_full_reg_size(s);
295 if (fn == NULL) {
296 return false;
297 }
298 if (sve_access_check(s)) {
299 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
300 vec_full_reg_offset(s, a->rn),
301 vec_full_reg_offset(s, a->rm),
302 pred_full_reg_offset(s, a->pg),
303 vsz, vsz, 0, fn);
304 }
305 return true;
306 }
307
308 #define DO_ZPZZ(NAME, name) \
309 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
310 uint32_t insn) \
311 { \
312 static gen_helper_gvec_4 * const fns[4] = { \
313 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
314 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
315 }; \
316 return do_zpzz_ool(s, a, fns[a->esz]); \
317 }
318
319 DO_ZPZZ(AND, and)
320 DO_ZPZZ(EOR, eor)
321 DO_ZPZZ(ORR, orr)
322 DO_ZPZZ(BIC, bic)
323
324 DO_ZPZZ(ADD, add)
325 DO_ZPZZ(SUB, sub)
326
327 DO_ZPZZ(SMAX, smax)
328 DO_ZPZZ(UMAX, umax)
329 DO_ZPZZ(SMIN, smin)
330 DO_ZPZZ(UMIN, umin)
331 DO_ZPZZ(SABD, sabd)
332 DO_ZPZZ(UABD, uabd)
333
334 DO_ZPZZ(MUL, mul)
335 DO_ZPZZ(SMULH, smulh)
336 DO_ZPZZ(UMULH, umulh)
337
338 DO_ZPZZ(ASR, asr)
339 DO_ZPZZ(LSR, lsr)
340 DO_ZPZZ(LSL, lsl)
341
342 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
343 {
344 static gen_helper_gvec_4 * const fns[4] = {
345 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
346 };
347 return do_zpzz_ool(s, a, fns[a->esz]);
348 }
349
350 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
351 {
352 static gen_helper_gvec_4 * const fns[4] = {
353 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
354 };
355 return do_zpzz_ool(s, a, fns[a->esz]);
356 }
357
358 #undef DO_ZPZZ
359
360 /*
361 *** SVE Integer Arithmetic - Unary Predicated Group
362 */
363
364 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
365 {
366 if (fn == NULL) {
367 return false;
368 }
369 if (sve_access_check(s)) {
370 unsigned vsz = vec_full_reg_size(s);
371 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
372 vec_full_reg_offset(s, a->rn),
373 pred_full_reg_offset(s, a->pg),
374 vsz, vsz, 0, fn);
375 }
376 return true;
377 }
378
379 #define DO_ZPZ(NAME, name) \
380 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
381 { \
382 static gen_helper_gvec_3 * const fns[4] = { \
383 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
384 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
385 }; \
386 return do_zpz_ool(s, a, fns[a->esz]); \
387 }
388
389 DO_ZPZ(CLS, cls)
390 DO_ZPZ(CLZ, clz)
391 DO_ZPZ(CNT_zpz, cnt_zpz)
392 DO_ZPZ(CNOT, cnot)
393 DO_ZPZ(NOT_zpz, not_zpz)
394 DO_ZPZ(ABS, abs)
395 DO_ZPZ(NEG, neg)
396
397 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
398 {
399 static gen_helper_gvec_3 * const fns[4] = {
400 NULL,
401 gen_helper_sve_fabs_h,
402 gen_helper_sve_fabs_s,
403 gen_helper_sve_fabs_d
404 };
405 return do_zpz_ool(s, a, fns[a->esz]);
406 }
407
408 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
409 {
410 static gen_helper_gvec_3 * const fns[4] = {
411 NULL,
412 gen_helper_sve_fneg_h,
413 gen_helper_sve_fneg_s,
414 gen_helper_sve_fneg_d
415 };
416 return do_zpz_ool(s, a, fns[a->esz]);
417 }
418
419 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
420 {
421 static gen_helper_gvec_3 * const fns[4] = {
422 NULL,
423 gen_helper_sve_sxtb_h,
424 gen_helper_sve_sxtb_s,
425 gen_helper_sve_sxtb_d
426 };
427 return do_zpz_ool(s, a, fns[a->esz]);
428 }
429
430 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
431 {
432 static gen_helper_gvec_3 * const fns[4] = {
433 NULL,
434 gen_helper_sve_uxtb_h,
435 gen_helper_sve_uxtb_s,
436 gen_helper_sve_uxtb_d
437 };
438 return do_zpz_ool(s, a, fns[a->esz]);
439 }
440
441 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
442 {
443 static gen_helper_gvec_3 * const fns[4] = {
444 NULL, NULL,
445 gen_helper_sve_sxth_s,
446 gen_helper_sve_sxth_d
447 };
448 return do_zpz_ool(s, a, fns[a->esz]);
449 }
450
451 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
452 {
453 static gen_helper_gvec_3 * const fns[4] = {
454 NULL, NULL,
455 gen_helper_sve_uxth_s,
456 gen_helper_sve_uxth_d
457 };
458 return do_zpz_ool(s, a, fns[a->esz]);
459 }
460
461 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
462 {
463 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
464 }
465
466 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
467 {
468 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
469 }
470
471 #undef DO_ZPZ
472
473 /*
474 *** SVE Integer Reduction Group
475 */
476
477 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
478 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
479 gen_helper_gvec_reduc *fn)
480 {
481 unsigned vsz = vec_full_reg_size(s);
482 TCGv_ptr t_zn, t_pg;
483 TCGv_i32 desc;
484 TCGv_i64 temp;
485
486 if (fn == NULL) {
487 return false;
488 }
489 if (!sve_access_check(s)) {
490 return true;
491 }
492
493 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
494 temp = tcg_temp_new_i64();
495 t_zn = tcg_temp_new_ptr();
496 t_pg = tcg_temp_new_ptr();
497
498 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
499 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
500 fn(temp, t_zn, t_pg, desc);
501 tcg_temp_free_ptr(t_zn);
502 tcg_temp_free_ptr(t_pg);
503 tcg_temp_free_i32(desc);
504
505 write_fp_dreg(s, a->rd, temp);
506 tcg_temp_free_i64(temp);
507 return true;
508 }
509
510 #define DO_VPZ(NAME, name) \
511 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
512 { \
513 static gen_helper_gvec_reduc * const fns[4] = { \
514 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
515 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
516 }; \
517 return do_vpz_ool(s, a, fns[a->esz]); \
518 }
519
520 DO_VPZ(ORV, orv)
521 DO_VPZ(ANDV, andv)
522 DO_VPZ(EORV, eorv)
523
524 DO_VPZ(UADDV, uaddv)
525 DO_VPZ(SMAXV, smaxv)
526 DO_VPZ(UMAXV, umaxv)
527 DO_VPZ(SMINV, sminv)
528 DO_VPZ(UMINV, uminv)
529
530 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
531 {
532 static gen_helper_gvec_reduc * const fns[4] = {
533 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
534 gen_helper_sve_saddv_s, NULL
535 };
536 return do_vpz_ool(s, a, fns[a->esz]);
537 }
538
539 #undef DO_VPZ
540
541 /*
542 *** SVE Shift by Immediate - Predicated Group
543 */
544
545 /* Store zero into every active element of Zd. We will use this for two
546 * and three-operand predicated instructions for which logic dictates a
547 * zero result.
548 */
549 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
550 {
551 static gen_helper_gvec_2 * const fns[4] = {
552 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
553 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
554 };
555 if (sve_access_check(s)) {
556 unsigned vsz = vec_full_reg_size(s);
557 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
558 pred_full_reg_offset(s, pg),
559 vsz, vsz, 0, fns[esz]);
560 }
561 return true;
562 }
563
564 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
565 gen_helper_gvec_3 *fn)
566 {
567 if (sve_access_check(s)) {
568 unsigned vsz = vec_full_reg_size(s);
569 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
570 vec_full_reg_offset(s, a->rn),
571 pred_full_reg_offset(s, a->pg),
572 vsz, vsz, a->imm, fn);
573 }
574 return true;
575 }
576
577 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
578 {
579 static gen_helper_gvec_3 * const fns[4] = {
580 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
581 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
582 };
583 if (a->esz < 0) {
584 /* Invalid tsz encoding -- see tszimm_esz. */
585 return false;
586 }
587 /* Shift by element size is architecturally valid. For
588 arithmetic right-shift, it's the same as by one less. */
589 a->imm = MIN(a->imm, (8 << a->esz) - 1);
590 return do_zpzi_ool(s, a, fns[a->esz]);
591 }
592
593 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
594 {
595 static gen_helper_gvec_3 * const fns[4] = {
596 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
597 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
598 };
599 if (a->esz < 0) {
600 return false;
601 }
602 /* Shift by element size is architecturally valid.
603 For logical shifts, it is a zeroing operation. */
604 if (a->imm >= (8 << a->esz)) {
605 return do_clr_zp(s, a->rd, a->pg, a->esz);
606 } else {
607 return do_zpzi_ool(s, a, fns[a->esz]);
608 }
609 }
610
611 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
612 {
613 static gen_helper_gvec_3 * const fns[4] = {
614 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
615 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
616 };
617 if (a->esz < 0) {
618 return false;
619 }
620 /* Shift by element size is architecturally valid.
621 For logical shifts, it is a zeroing operation. */
622 if (a->imm >= (8 << a->esz)) {
623 return do_clr_zp(s, a->rd, a->pg, a->esz);
624 } else {
625 return do_zpzi_ool(s, a, fns[a->esz]);
626 }
627 }
628
629 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
630 {
631 static gen_helper_gvec_3 * const fns[4] = {
632 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
633 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
634 };
635 if (a->esz < 0) {
636 return false;
637 }
638 /* Shift by element size is architecturally valid. For arithmetic
639 right shift for division, it is a zeroing operation. */
640 if (a->imm >= (8 << a->esz)) {
641 return do_clr_zp(s, a->rd, a->pg, a->esz);
642 } else {
643 return do_zpzi_ool(s, a, fns[a->esz]);
644 }
645 }
646
647 /*
648 *** SVE Bitwise Shift - Predicated Group
649 */
650
651 #define DO_ZPZW(NAME, name) \
652 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
653 uint32_t insn) \
654 { \
655 static gen_helper_gvec_4 * const fns[3] = { \
656 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
657 gen_helper_sve_##name##_zpzw_s, \
658 }; \
659 if (a->esz < 0 || a->esz >= 3) { \
660 return false; \
661 } \
662 return do_zpzz_ool(s, a, fns[a->esz]); \
663 }
664
665 DO_ZPZW(ASR, asr)
666 DO_ZPZW(LSR, lsr)
667 DO_ZPZW(LSL, lsl)
668
669 #undef DO_ZPZW
670
671 /*
672 *** SVE Integer Multiply-Add Group
673 */
674
675 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
676 gen_helper_gvec_5 *fn)
677 {
678 if (sve_access_check(s)) {
679 unsigned vsz = vec_full_reg_size(s);
680 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
681 vec_full_reg_offset(s, a->ra),
682 vec_full_reg_offset(s, a->rn),
683 vec_full_reg_offset(s, a->rm),
684 pred_full_reg_offset(s, a->pg),
685 vsz, vsz, 0, fn);
686 }
687 return true;
688 }
689
690 #define DO_ZPZZZ(NAME, name) \
691 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
692 { \
693 static gen_helper_gvec_5 * const fns[4] = { \
694 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
695 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
696 }; \
697 return do_zpzzz_ool(s, a, fns[a->esz]); \
698 }
699
700 DO_ZPZZZ(MLA, mla)
701 DO_ZPZZZ(MLS, mls)
702
703 #undef DO_ZPZZZ
704
705 /*
706 *** SVE Predicate Logical Operations Group
707 */
708
709 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
710 const GVecGen4 *gvec_op)
711 {
712 if (!sve_access_check(s)) {
713 return true;
714 }
715
716 unsigned psz = pred_gvec_reg_size(s);
717 int dofs = pred_full_reg_offset(s, a->rd);
718 int nofs = pred_full_reg_offset(s, a->rn);
719 int mofs = pred_full_reg_offset(s, a->rm);
720 int gofs = pred_full_reg_offset(s, a->pg);
721
722 if (psz == 8) {
723 /* Do the operation and the flags generation in temps. */
724 TCGv_i64 pd = tcg_temp_new_i64();
725 TCGv_i64 pn = tcg_temp_new_i64();
726 TCGv_i64 pm = tcg_temp_new_i64();
727 TCGv_i64 pg = tcg_temp_new_i64();
728
729 tcg_gen_ld_i64(pn, cpu_env, nofs);
730 tcg_gen_ld_i64(pm, cpu_env, mofs);
731 tcg_gen_ld_i64(pg, cpu_env, gofs);
732
733 gvec_op->fni8(pd, pn, pm, pg);
734 tcg_gen_st_i64(pd, cpu_env, dofs);
735
736 do_predtest1(pd, pg);
737
738 tcg_temp_free_i64(pd);
739 tcg_temp_free_i64(pn);
740 tcg_temp_free_i64(pm);
741 tcg_temp_free_i64(pg);
742 } else {
743 /* The operation and flags generation is large. The computation
744 * of the flags depends on the original contents of the guarding
745 * predicate. If the destination overwrites the guarding predicate,
746 * then the easiest way to get this right is to save a copy.
747 */
748 int tofs = gofs;
749 if (a->rd == a->pg) {
750 tofs = offsetof(CPUARMState, vfp.preg_tmp);
751 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
752 }
753
754 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
755 do_predtest(s, dofs, tofs, psz / 8);
756 }
757 return true;
758 }
759
760 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
761 {
762 tcg_gen_and_i64(pd, pn, pm);
763 tcg_gen_and_i64(pd, pd, pg);
764 }
765
766 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
767 TCGv_vec pm, TCGv_vec pg)
768 {
769 tcg_gen_and_vec(vece, pd, pn, pm);
770 tcg_gen_and_vec(vece, pd, pd, pg);
771 }
772
773 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
774 {
775 static const GVecGen4 op = {
776 .fni8 = gen_and_pg_i64,
777 .fniv = gen_and_pg_vec,
778 .fno = gen_helper_sve_and_pppp,
779 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
780 };
781 if (a->s) {
782 return do_pppp_flags(s, a, &op);
783 } else if (a->rn == a->rm) {
784 if (a->pg == a->rn) {
785 return do_mov_p(s, a->rd, a->rn);
786 } else {
787 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
788 }
789 } else if (a->pg == a->rn || a->pg == a->rm) {
790 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
791 } else {
792 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
793 }
794 }
795
796 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
797 {
798 tcg_gen_andc_i64(pd, pn, pm);
799 tcg_gen_and_i64(pd, pd, pg);
800 }
801
802 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
803 TCGv_vec pm, TCGv_vec pg)
804 {
805 tcg_gen_andc_vec(vece, pd, pn, pm);
806 tcg_gen_and_vec(vece, pd, pd, pg);
807 }
808
809 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
810 {
811 static const GVecGen4 op = {
812 .fni8 = gen_bic_pg_i64,
813 .fniv = gen_bic_pg_vec,
814 .fno = gen_helper_sve_bic_pppp,
815 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
816 };
817 if (a->s) {
818 return do_pppp_flags(s, a, &op);
819 } else if (a->pg == a->rn) {
820 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
821 } else {
822 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
823 }
824 }
825
826 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
827 {
828 tcg_gen_xor_i64(pd, pn, pm);
829 tcg_gen_and_i64(pd, pd, pg);
830 }
831
832 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
833 TCGv_vec pm, TCGv_vec pg)
834 {
835 tcg_gen_xor_vec(vece, pd, pn, pm);
836 tcg_gen_and_vec(vece, pd, pd, pg);
837 }
838
839 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
840 {
841 static const GVecGen4 op = {
842 .fni8 = gen_eor_pg_i64,
843 .fniv = gen_eor_pg_vec,
844 .fno = gen_helper_sve_eor_pppp,
845 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
846 };
847 if (a->s) {
848 return do_pppp_flags(s, a, &op);
849 } else {
850 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
851 }
852 }
853
854 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
855 {
856 tcg_gen_and_i64(pn, pn, pg);
857 tcg_gen_andc_i64(pm, pm, pg);
858 tcg_gen_or_i64(pd, pn, pm);
859 }
860
861 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
862 TCGv_vec pm, TCGv_vec pg)
863 {
864 tcg_gen_and_vec(vece, pn, pn, pg);
865 tcg_gen_andc_vec(vece, pm, pm, pg);
866 tcg_gen_or_vec(vece, pd, pn, pm);
867 }
868
869 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
870 {
871 static const GVecGen4 op = {
872 .fni8 = gen_sel_pg_i64,
873 .fniv = gen_sel_pg_vec,
874 .fno = gen_helper_sve_sel_pppp,
875 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
876 };
877 if (a->s) {
878 return false;
879 } else {
880 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
881 }
882 }
883
884 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
885 {
886 tcg_gen_or_i64(pd, pn, pm);
887 tcg_gen_and_i64(pd, pd, pg);
888 }
889
890 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
891 TCGv_vec pm, TCGv_vec pg)
892 {
893 tcg_gen_or_vec(vece, pd, pn, pm);
894 tcg_gen_and_vec(vece, pd, pd, pg);
895 }
896
897 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
898 {
899 static const GVecGen4 op = {
900 .fni8 = gen_orr_pg_i64,
901 .fniv = gen_orr_pg_vec,
902 .fno = gen_helper_sve_orr_pppp,
903 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
904 };
905 if (a->s) {
906 return do_pppp_flags(s, a, &op);
907 } else if (a->pg == a->rn && a->rn == a->rm) {
908 return do_mov_p(s, a->rd, a->rn);
909 } else {
910 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
911 }
912 }
913
914 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
915 {
916 tcg_gen_orc_i64(pd, pn, pm);
917 tcg_gen_and_i64(pd, pd, pg);
918 }
919
920 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
921 TCGv_vec pm, TCGv_vec pg)
922 {
923 tcg_gen_orc_vec(vece, pd, pn, pm);
924 tcg_gen_and_vec(vece, pd, pd, pg);
925 }
926
927 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
928 {
929 static const GVecGen4 op = {
930 .fni8 = gen_orn_pg_i64,
931 .fniv = gen_orn_pg_vec,
932 .fno = gen_helper_sve_orn_pppp,
933 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
934 };
935 if (a->s) {
936 return do_pppp_flags(s, a, &op);
937 } else {
938 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
939 }
940 }
941
942 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
943 {
944 tcg_gen_or_i64(pd, pn, pm);
945 tcg_gen_andc_i64(pd, pg, pd);
946 }
947
948 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
949 TCGv_vec pm, TCGv_vec pg)
950 {
951 tcg_gen_or_vec(vece, pd, pn, pm);
952 tcg_gen_andc_vec(vece, pd, pg, pd);
953 }
954
955 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
956 {
957 static const GVecGen4 op = {
958 .fni8 = gen_nor_pg_i64,
959 .fniv = gen_nor_pg_vec,
960 .fno = gen_helper_sve_nor_pppp,
961 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
962 };
963 if (a->s) {
964 return do_pppp_flags(s, a, &op);
965 } else {
966 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
967 }
968 }
969
970 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
971 {
972 tcg_gen_and_i64(pd, pn, pm);
973 tcg_gen_andc_i64(pd, pg, pd);
974 }
975
976 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
977 TCGv_vec pm, TCGv_vec pg)
978 {
979 tcg_gen_and_vec(vece, pd, pn, pm);
980 tcg_gen_andc_vec(vece, pd, pg, pd);
981 }
982
983 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
984 {
985 static const GVecGen4 op = {
986 .fni8 = gen_nand_pg_i64,
987 .fniv = gen_nand_pg_vec,
988 .fno = gen_helper_sve_nand_pppp,
989 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
990 };
991 if (a->s) {
992 return do_pppp_flags(s, a, &op);
993 } else {
994 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
995 }
996 }
997
998 /*
999 *** SVE Predicate Misc Group
1000 */
1001
1002 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1003 {
1004 if (sve_access_check(s)) {
1005 int nofs = pred_full_reg_offset(s, a->rn);
1006 int gofs = pred_full_reg_offset(s, a->pg);
1007 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1008
1009 if (words == 1) {
1010 TCGv_i64 pn = tcg_temp_new_i64();
1011 TCGv_i64 pg = tcg_temp_new_i64();
1012
1013 tcg_gen_ld_i64(pn, cpu_env, nofs);
1014 tcg_gen_ld_i64(pg, cpu_env, gofs);
1015 do_predtest1(pn, pg);
1016
1017 tcg_temp_free_i64(pn);
1018 tcg_temp_free_i64(pg);
1019 } else {
1020 do_predtest(s, nofs, gofs, words);
1021 }
1022 }
1023 return true;
1024 }
1025
1026 /* See the ARM pseudocode DecodePredCount. */
1027 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1028 {
1029 unsigned elements = fullsz >> esz;
1030 unsigned bound;
1031
1032 switch (pattern) {
1033 case 0x0: /* POW2 */
1034 return pow2floor(elements);
1035 case 0x1: /* VL1 */
1036 case 0x2: /* VL2 */
1037 case 0x3: /* VL3 */
1038 case 0x4: /* VL4 */
1039 case 0x5: /* VL5 */
1040 case 0x6: /* VL6 */
1041 case 0x7: /* VL7 */
1042 case 0x8: /* VL8 */
1043 bound = pattern;
1044 break;
1045 case 0x9: /* VL16 */
1046 case 0xa: /* VL32 */
1047 case 0xb: /* VL64 */
1048 case 0xc: /* VL128 */
1049 case 0xd: /* VL256 */
1050 bound = 16 << (pattern - 9);
1051 break;
1052 case 0x1d: /* MUL4 */
1053 return elements - elements % 4;
1054 case 0x1e: /* MUL3 */
1055 return elements - elements % 3;
1056 case 0x1f: /* ALL */
1057 return elements;
1058 default: /* #uimm5 */
1059 return 0;
1060 }
1061 return elements >= bound ? bound : 0;
1062 }
1063
1064 /* This handles all of the predicate initialization instructions,
1065 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1066 * so that decode_pred_count returns 0. For SETFFR, we will have
1067 * set RD == 16 == FFR.
1068 */
1069 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1070 {
1071 if (!sve_access_check(s)) {
1072 return true;
1073 }
1074
1075 unsigned fullsz = vec_full_reg_size(s);
1076 unsigned ofs = pred_full_reg_offset(s, rd);
1077 unsigned numelem, setsz, i;
1078 uint64_t word, lastword;
1079 TCGv_i64 t;
1080
1081 numelem = decode_pred_count(fullsz, pat, esz);
1082
1083 /* Determine what we must store into each bit, and how many. */
1084 if (numelem == 0) {
1085 lastword = word = 0;
1086 setsz = fullsz;
1087 } else {
1088 setsz = numelem << esz;
1089 lastword = word = pred_esz_masks[esz];
1090 if (setsz % 64) {
1091 lastword &= ~(-1ull << (setsz % 64));
1092 }
1093 }
1094
1095 t = tcg_temp_new_i64();
1096 if (fullsz <= 64) {
1097 tcg_gen_movi_i64(t, lastword);
1098 tcg_gen_st_i64(t, cpu_env, ofs);
1099 goto done;
1100 }
1101
1102 if (word == lastword) {
1103 unsigned maxsz = size_for_gvec(fullsz / 8);
1104 unsigned oprsz = size_for_gvec(setsz / 8);
1105
1106 if (oprsz * 8 == setsz) {
1107 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1108 goto done;
1109 }
1110 if (oprsz * 8 == setsz + 8) {
1111 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1112 tcg_gen_movi_i64(t, 0);
1113 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1114 goto done;
1115 }
1116 }
1117
1118 setsz /= 8;
1119 fullsz /= 8;
1120
1121 tcg_gen_movi_i64(t, word);
1122 for (i = 0; i < setsz; i += 8) {
1123 tcg_gen_st_i64(t, cpu_env, ofs + i);
1124 }
1125 if (lastword != word) {
1126 tcg_gen_movi_i64(t, lastword);
1127 tcg_gen_st_i64(t, cpu_env, ofs + i);
1128 i += 8;
1129 }
1130 if (i < fullsz) {
1131 tcg_gen_movi_i64(t, 0);
1132 for (; i < fullsz; i += 8) {
1133 tcg_gen_st_i64(t, cpu_env, ofs + i);
1134 }
1135 }
1136
1137 done:
1138 tcg_temp_free_i64(t);
1139
1140 /* PTRUES */
1141 if (setflag) {
1142 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1143 tcg_gen_movi_i32(cpu_CF, word == 0);
1144 tcg_gen_movi_i32(cpu_VF, 0);
1145 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1146 }
1147 return true;
1148 }
1149
1150 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1151 {
1152 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1153 }
1154
1155 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1156 {
1157 /* Note pat == 31 is #all, to set all elements. */
1158 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1159 }
1160
1161 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1162 {
1163 /* Note pat == 32 is #unimp, to set no elements. */
1164 return do_predset(s, 0, a->rd, 32, false);
1165 }
1166
1167 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1168 {
1169 /* The path through do_pppp_flags is complicated enough to want to avoid
1170 * duplication. Frob the arguments into the form of a predicated AND.
1171 */
1172 arg_rprr_s alt_a = {
1173 .rd = a->rd, .pg = a->pg, .s = a->s,
1174 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1175 };
1176 return trans_AND_pppp(s, &alt_a, insn);
1177 }
1178
1179 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1180 {
1181 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1182 }
1183
1184 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1185 {
1186 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1187 }
1188
1189 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1190 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1191 TCGv_ptr, TCGv_i32))
1192 {
1193 if (!sve_access_check(s)) {
1194 return true;
1195 }
1196
1197 TCGv_ptr t_pd = tcg_temp_new_ptr();
1198 TCGv_ptr t_pg = tcg_temp_new_ptr();
1199 TCGv_i32 t;
1200 unsigned desc;
1201
1202 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1203 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1204
1205 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1206 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1207 t = tcg_const_i32(desc);
1208
1209 gen_fn(t, t_pd, t_pg, t);
1210 tcg_temp_free_ptr(t_pd);
1211 tcg_temp_free_ptr(t_pg);
1212
1213 do_pred_flags(t);
1214 tcg_temp_free_i32(t);
1215 return true;
1216 }
1217
1218 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1219 {
1220 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1221 }
1222
1223 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1224 {
1225 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1226 }
1227
1228 /*
1229 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1230 */
1231
1232 /* Subroutine loading a vector register at VOFS of LEN bytes.
1233 * The load should begin at the address Rn + IMM.
1234 */
1235
1236 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
1237 int rn, int imm)
1238 {
1239 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
1240 uint32_t len_remain = len % 8;
1241 uint32_t nparts = len / 8 + ctpop8(len_remain);
1242 int midx = get_mem_index(s);
1243 TCGv_i64 addr, t0, t1;
1244
1245 addr = tcg_temp_new_i64();
1246 t0 = tcg_temp_new_i64();
1247
1248 /* Note that unpredicated load/store of vector/predicate registers
1249 * are defined as a stream of bytes, which equates to little-endian
1250 * operations on larger quantities. There is no nice way to force
1251 * a little-endian load for aarch64_be-linux-user out of line.
1252 *
1253 * Attempt to keep code expansion to a minimum by limiting the
1254 * amount of unrolling done.
1255 */
1256 if (nparts <= 4) {
1257 int i;
1258
1259 for (i = 0; i < len_align; i += 8) {
1260 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
1261 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1262 tcg_gen_st_i64(t0, cpu_env, vofs + i);
1263 }
1264 } else {
1265 TCGLabel *loop = gen_new_label();
1266 TCGv_ptr tp, i = tcg_const_local_ptr(0);
1267
1268 gen_set_label(loop);
1269
1270 /* Minimize the number of local temps that must be re-read from
1271 * the stack each iteration. Instead, re-compute values other
1272 * than the loop counter.
1273 */
1274 tp = tcg_temp_new_ptr();
1275 tcg_gen_addi_ptr(tp, i, imm);
1276 tcg_gen_extu_ptr_i64(addr, tp);
1277 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
1278
1279 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1280
1281 tcg_gen_add_ptr(tp, cpu_env, i);
1282 tcg_gen_addi_ptr(i, i, 8);
1283 tcg_gen_st_i64(t0, tp, vofs);
1284 tcg_temp_free_ptr(tp);
1285
1286 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
1287 tcg_temp_free_ptr(i);
1288 }
1289
1290 /* Predicate register loads can be any multiple of 2.
1291 * Note that we still store the entire 64-bit unit into cpu_env.
1292 */
1293 if (len_remain) {
1294 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
1295
1296 switch (len_remain) {
1297 case 2:
1298 case 4:
1299 case 8:
1300 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
1301 break;
1302
1303 case 6:
1304 t1 = tcg_temp_new_i64();
1305 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
1306 tcg_gen_addi_i64(addr, addr, 4);
1307 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
1308 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
1309 tcg_temp_free_i64(t1);
1310 break;
1311
1312 default:
1313 g_assert_not_reached();
1314 }
1315 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
1316 }
1317 tcg_temp_free_i64(addr);
1318 tcg_temp_free_i64(t0);
1319 }
1320
1321 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
1322 {
1323 if (sve_access_check(s)) {
1324 int size = vec_full_reg_size(s);
1325 int off = vec_full_reg_offset(s, a->rd);
1326 do_ldr(s, off, size, a->rn, a->imm * size);
1327 }
1328 return true;
1329 }
1330
1331 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
1332 {
1333 if (sve_access_check(s)) {
1334 int size = pred_full_reg_size(s);
1335 int off = pred_full_reg_offset(s, a->rd);
1336 do_ldr(s, off, size, a->rn, a->imm * size);
1337 }
1338 return true;
1339 }