]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement SVE Predicate Misc Group
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
35
36/*
37 * Include the generated decoder.
38 */
39
40#include "decode-sve.inc.c"
41
42/*
43 * Implement all of the translator functions referenced by the decoder.
44 */
45
d1822297
RH
46/* Return the offset info CPUARMState of the predicate vector register Pn.
47 * Note for this purpose, FFR is P16.
48 */
49static inline int pred_full_reg_offset(DisasContext *s, int regno)
50{
51 return offsetof(CPUARMState, vfp.pregs[regno]);
52}
53
54/* Return the byte size of the whole predicate register, VL / 64. */
55static inline int pred_full_reg_size(DisasContext *s)
56{
57 return s->sve_len >> 3;
58}
59
516e246a
RH
60/* Round up the size of a register to a size allowed by
61 * the tcg vector infrastructure. Any operation which uses this
62 * size may assume that the bits above pred_full_reg_size are zero,
63 * and must leave them the same way.
64 *
65 * Note that this is not needed for the vector registers as they
66 * are always properly sized for tcg vectors.
67 */
68static int size_for_gvec(int size)
69{
70 if (size <= 8) {
71 return 8;
72 } else {
73 return QEMU_ALIGN_UP(size, 16);
74 }
75}
76
77static int pred_gvec_reg_size(DisasContext *s)
78{
79 return size_for_gvec(pred_full_reg_size(s));
80}
81
39eea561
RH
82/* Invoke a vector expander on two Zregs. */
83static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
84 int esz, int rd, int rn)
38388f7e 85{
39eea561
RH
86 if (sve_access_check(s)) {
87 unsigned vsz = vec_full_reg_size(s);
88 gvec_fn(esz, vec_full_reg_offset(s, rd),
89 vec_full_reg_offset(s, rn), vsz, vsz);
90 }
91 return true;
38388f7e
RH
92}
93
39eea561
RH
94/* Invoke a vector expander on three Zregs. */
95static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
96 int esz, int rd, int rn, int rm)
38388f7e 97{
39eea561
RH
98 if (sve_access_check(s)) {
99 unsigned vsz = vec_full_reg_size(s);
100 gvec_fn(esz, vec_full_reg_offset(s, rd),
101 vec_full_reg_offset(s, rn),
102 vec_full_reg_offset(s, rm), vsz, vsz);
103 }
104 return true;
38388f7e
RH
105}
106
39eea561
RH
107/* Invoke a vector move on two Zregs. */
108static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 109{
39eea561 110 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
111}
112
516e246a
RH
113/* Invoke a vector expander on two Pregs. */
114static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
115 int esz, int rd, int rn)
116{
117 if (sve_access_check(s)) {
118 unsigned psz = pred_gvec_reg_size(s);
119 gvec_fn(esz, pred_full_reg_offset(s, rd),
120 pred_full_reg_offset(s, rn), psz, psz);
121 }
122 return true;
123}
124
125/* Invoke a vector expander on three Pregs. */
126static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
127 int esz, int rd, int rn, int rm)
128{
129 if (sve_access_check(s)) {
130 unsigned psz = pred_gvec_reg_size(s);
131 gvec_fn(esz, pred_full_reg_offset(s, rd),
132 pred_full_reg_offset(s, rn),
133 pred_full_reg_offset(s, rm), psz, psz);
134 }
135 return true;
136}
137
138/* Invoke a vector operation on four Pregs. */
139static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
140 int rd, int rn, int rm, int rg)
141{
142 if (sve_access_check(s)) {
143 unsigned psz = pred_gvec_reg_size(s);
144 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
145 pred_full_reg_offset(s, rn),
146 pred_full_reg_offset(s, rm),
147 pred_full_reg_offset(s, rg),
148 psz, psz, gvec_op);
149 }
150 return true;
151}
152
153/* Invoke a vector move on two Pregs. */
154static bool do_mov_p(DisasContext *s, int rd, int rn)
155{
156 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
157}
158
9e18d7a6
RH
159/* Set the cpu flags as per a return from an SVE helper. */
160static void do_pred_flags(TCGv_i32 t)
161{
162 tcg_gen_mov_i32(cpu_NF, t);
163 tcg_gen_andi_i32(cpu_ZF, t, 2);
164 tcg_gen_andi_i32(cpu_CF, t, 1);
165 tcg_gen_movi_i32(cpu_VF, 0);
166}
167
168/* Subroutines computing the ARM PredTest psuedofunction. */
169static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
170{
171 TCGv_i32 t = tcg_temp_new_i32();
172
173 gen_helper_sve_predtest1(t, d, g);
174 do_pred_flags(t);
175 tcg_temp_free_i32(t);
176}
177
178static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
179{
180 TCGv_ptr dptr = tcg_temp_new_ptr();
181 TCGv_ptr gptr = tcg_temp_new_ptr();
182 TCGv_i32 t;
183
184 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
185 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
186 t = tcg_const_i32(words);
187
188 gen_helper_sve_predtest(t, dptr, gptr, t);
189 tcg_temp_free_ptr(dptr);
190 tcg_temp_free_ptr(gptr);
191
192 do_pred_flags(t);
193 tcg_temp_free_i32(t);
194}
195
028e2a7b
RH
196/* For each element size, the bits within a predicate word that are active. */
197const uint64_t pred_esz_masks[4] = {
198 0xffffffffffffffffull, 0x5555555555555555ull,
199 0x1111111111111111ull, 0x0101010101010101ull
200};
201
39eea561
RH
202/*
203 *** SVE Logical - Unpredicated Group
204 */
205
206static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
207{
208 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
209}
210
211static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
212{
213 if (a->rn == a->rm) { /* MOV */
214 return do_mov_z(s, a->rd, a->rn);
215 } else {
216 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
217 }
218}
219
220static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
221{
222 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
223}
224
225static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 226{
39eea561 227 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 228}
d1822297 229
516e246a
RH
230/*
231 *** SVE Predicate Logical Operations Group
232 */
233
234static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
235 const GVecGen4 *gvec_op)
236{
237 if (!sve_access_check(s)) {
238 return true;
239 }
240
241 unsigned psz = pred_gvec_reg_size(s);
242 int dofs = pred_full_reg_offset(s, a->rd);
243 int nofs = pred_full_reg_offset(s, a->rn);
244 int mofs = pred_full_reg_offset(s, a->rm);
245 int gofs = pred_full_reg_offset(s, a->pg);
246
247 if (psz == 8) {
248 /* Do the operation and the flags generation in temps. */
249 TCGv_i64 pd = tcg_temp_new_i64();
250 TCGv_i64 pn = tcg_temp_new_i64();
251 TCGv_i64 pm = tcg_temp_new_i64();
252 TCGv_i64 pg = tcg_temp_new_i64();
253
254 tcg_gen_ld_i64(pn, cpu_env, nofs);
255 tcg_gen_ld_i64(pm, cpu_env, mofs);
256 tcg_gen_ld_i64(pg, cpu_env, gofs);
257
258 gvec_op->fni8(pd, pn, pm, pg);
259 tcg_gen_st_i64(pd, cpu_env, dofs);
260
261 do_predtest1(pd, pg);
262
263 tcg_temp_free_i64(pd);
264 tcg_temp_free_i64(pn);
265 tcg_temp_free_i64(pm);
266 tcg_temp_free_i64(pg);
267 } else {
268 /* The operation and flags generation is large. The computation
269 * of the flags depends on the original contents of the guarding
270 * predicate. If the destination overwrites the guarding predicate,
271 * then the easiest way to get this right is to save a copy.
272 */
273 int tofs = gofs;
274 if (a->rd == a->pg) {
275 tofs = offsetof(CPUARMState, vfp.preg_tmp);
276 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
277 }
278
279 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
280 do_predtest(s, dofs, tofs, psz / 8);
281 }
282 return true;
283}
284
285static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
286{
287 tcg_gen_and_i64(pd, pn, pm);
288 tcg_gen_and_i64(pd, pd, pg);
289}
290
291static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
292 TCGv_vec pm, TCGv_vec pg)
293{
294 tcg_gen_and_vec(vece, pd, pn, pm);
295 tcg_gen_and_vec(vece, pd, pd, pg);
296}
297
298static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
299{
300 static const GVecGen4 op = {
301 .fni8 = gen_and_pg_i64,
302 .fniv = gen_and_pg_vec,
303 .fno = gen_helper_sve_and_pppp,
304 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
305 };
306 if (a->s) {
307 return do_pppp_flags(s, a, &op);
308 } else if (a->rn == a->rm) {
309 if (a->pg == a->rn) {
310 return do_mov_p(s, a->rd, a->rn);
311 } else {
312 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
313 }
314 } else if (a->pg == a->rn || a->pg == a->rm) {
315 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
316 } else {
317 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
318 }
319}
320
321static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
322{
323 tcg_gen_andc_i64(pd, pn, pm);
324 tcg_gen_and_i64(pd, pd, pg);
325}
326
327static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
328 TCGv_vec pm, TCGv_vec pg)
329{
330 tcg_gen_andc_vec(vece, pd, pn, pm);
331 tcg_gen_and_vec(vece, pd, pd, pg);
332}
333
334static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
335{
336 static const GVecGen4 op = {
337 .fni8 = gen_bic_pg_i64,
338 .fniv = gen_bic_pg_vec,
339 .fno = gen_helper_sve_bic_pppp,
340 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
341 };
342 if (a->s) {
343 return do_pppp_flags(s, a, &op);
344 } else if (a->pg == a->rn) {
345 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
346 } else {
347 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
348 }
349}
350
351static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
352{
353 tcg_gen_xor_i64(pd, pn, pm);
354 tcg_gen_and_i64(pd, pd, pg);
355}
356
357static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
358 TCGv_vec pm, TCGv_vec pg)
359{
360 tcg_gen_xor_vec(vece, pd, pn, pm);
361 tcg_gen_and_vec(vece, pd, pd, pg);
362}
363
364static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
365{
366 static const GVecGen4 op = {
367 .fni8 = gen_eor_pg_i64,
368 .fniv = gen_eor_pg_vec,
369 .fno = gen_helper_sve_eor_pppp,
370 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
371 };
372 if (a->s) {
373 return do_pppp_flags(s, a, &op);
374 } else {
375 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
376 }
377}
378
379static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
380{
381 tcg_gen_and_i64(pn, pn, pg);
382 tcg_gen_andc_i64(pm, pm, pg);
383 tcg_gen_or_i64(pd, pn, pm);
384}
385
386static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
387 TCGv_vec pm, TCGv_vec pg)
388{
389 tcg_gen_and_vec(vece, pn, pn, pg);
390 tcg_gen_andc_vec(vece, pm, pm, pg);
391 tcg_gen_or_vec(vece, pd, pn, pm);
392}
393
394static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
395{
396 static const GVecGen4 op = {
397 .fni8 = gen_sel_pg_i64,
398 .fniv = gen_sel_pg_vec,
399 .fno = gen_helper_sve_sel_pppp,
400 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
401 };
402 if (a->s) {
403 return false;
404 } else {
405 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
406 }
407}
408
409static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
410{
411 tcg_gen_or_i64(pd, pn, pm);
412 tcg_gen_and_i64(pd, pd, pg);
413}
414
415static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
416 TCGv_vec pm, TCGv_vec pg)
417{
418 tcg_gen_or_vec(vece, pd, pn, pm);
419 tcg_gen_and_vec(vece, pd, pd, pg);
420}
421
422static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
423{
424 static const GVecGen4 op = {
425 .fni8 = gen_orr_pg_i64,
426 .fniv = gen_orr_pg_vec,
427 .fno = gen_helper_sve_orr_pppp,
428 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
429 };
430 if (a->s) {
431 return do_pppp_flags(s, a, &op);
432 } else if (a->pg == a->rn && a->rn == a->rm) {
433 return do_mov_p(s, a->rd, a->rn);
434 } else {
435 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
436 }
437}
438
439static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
440{
441 tcg_gen_orc_i64(pd, pn, pm);
442 tcg_gen_and_i64(pd, pd, pg);
443}
444
445static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
446 TCGv_vec pm, TCGv_vec pg)
447{
448 tcg_gen_orc_vec(vece, pd, pn, pm);
449 tcg_gen_and_vec(vece, pd, pd, pg);
450}
451
452static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
453{
454 static const GVecGen4 op = {
455 .fni8 = gen_orn_pg_i64,
456 .fniv = gen_orn_pg_vec,
457 .fno = gen_helper_sve_orn_pppp,
458 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
459 };
460 if (a->s) {
461 return do_pppp_flags(s, a, &op);
462 } else {
463 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
464 }
465}
466
467static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
468{
469 tcg_gen_or_i64(pd, pn, pm);
470 tcg_gen_andc_i64(pd, pg, pd);
471}
472
473static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
474 TCGv_vec pm, TCGv_vec pg)
475{
476 tcg_gen_or_vec(vece, pd, pn, pm);
477 tcg_gen_andc_vec(vece, pd, pg, pd);
478}
479
480static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
481{
482 static const GVecGen4 op = {
483 .fni8 = gen_nor_pg_i64,
484 .fniv = gen_nor_pg_vec,
485 .fno = gen_helper_sve_nor_pppp,
486 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
487 };
488 if (a->s) {
489 return do_pppp_flags(s, a, &op);
490 } else {
491 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
492 }
493}
494
495static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
496{
497 tcg_gen_and_i64(pd, pn, pm);
498 tcg_gen_andc_i64(pd, pg, pd);
499}
500
501static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
502 TCGv_vec pm, TCGv_vec pg)
503{
504 tcg_gen_and_vec(vece, pd, pn, pm);
505 tcg_gen_andc_vec(vece, pd, pg, pd);
506}
507
508static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
509{
510 static const GVecGen4 op = {
511 .fni8 = gen_nand_pg_i64,
512 .fniv = gen_nand_pg_vec,
513 .fno = gen_helper_sve_nand_pppp,
514 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
515 };
516 if (a->s) {
517 return do_pppp_flags(s, a, &op);
518 } else {
519 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
520 }
521}
522
9e18d7a6
RH
523/*
524 *** SVE Predicate Misc Group
525 */
526
527static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
528{
529 if (sve_access_check(s)) {
530 int nofs = pred_full_reg_offset(s, a->rn);
531 int gofs = pred_full_reg_offset(s, a->pg);
532 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
533
534 if (words == 1) {
535 TCGv_i64 pn = tcg_temp_new_i64();
536 TCGv_i64 pg = tcg_temp_new_i64();
537
538 tcg_gen_ld_i64(pn, cpu_env, nofs);
539 tcg_gen_ld_i64(pg, cpu_env, gofs);
540 do_predtest1(pn, pg);
541
542 tcg_temp_free_i64(pn);
543 tcg_temp_free_i64(pg);
544 } else {
545 do_predtest(s, nofs, gofs, words);
546 }
547 }
548 return true;
549}
550
028e2a7b
RH
551/* See the ARM pseudocode DecodePredCount. */
552static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
553{
554 unsigned elements = fullsz >> esz;
555 unsigned bound;
556
557 switch (pattern) {
558 case 0x0: /* POW2 */
559 return pow2floor(elements);
560 case 0x1: /* VL1 */
561 case 0x2: /* VL2 */
562 case 0x3: /* VL3 */
563 case 0x4: /* VL4 */
564 case 0x5: /* VL5 */
565 case 0x6: /* VL6 */
566 case 0x7: /* VL7 */
567 case 0x8: /* VL8 */
568 bound = pattern;
569 break;
570 case 0x9: /* VL16 */
571 case 0xa: /* VL32 */
572 case 0xb: /* VL64 */
573 case 0xc: /* VL128 */
574 case 0xd: /* VL256 */
575 bound = 16 << (pattern - 9);
576 break;
577 case 0x1d: /* MUL4 */
578 return elements - elements % 4;
579 case 0x1e: /* MUL3 */
580 return elements - elements % 3;
581 case 0x1f: /* ALL */
582 return elements;
583 default: /* #uimm5 */
584 return 0;
585 }
586 return elements >= bound ? bound : 0;
587}
588
589/* This handles all of the predicate initialization instructions,
590 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
591 * so that decode_pred_count returns 0. For SETFFR, we will have
592 * set RD == 16 == FFR.
593 */
594static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
595{
596 if (!sve_access_check(s)) {
597 return true;
598 }
599
600 unsigned fullsz = vec_full_reg_size(s);
601 unsigned ofs = pred_full_reg_offset(s, rd);
602 unsigned numelem, setsz, i;
603 uint64_t word, lastword;
604 TCGv_i64 t;
605
606 numelem = decode_pred_count(fullsz, pat, esz);
607
608 /* Determine what we must store into each bit, and how many. */
609 if (numelem == 0) {
610 lastword = word = 0;
611 setsz = fullsz;
612 } else {
613 setsz = numelem << esz;
614 lastword = word = pred_esz_masks[esz];
615 if (setsz % 64) {
616 lastword &= ~(-1ull << (setsz % 64));
617 }
618 }
619
620 t = tcg_temp_new_i64();
621 if (fullsz <= 64) {
622 tcg_gen_movi_i64(t, lastword);
623 tcg_gen_st_i64(t, cpu_env, ofs);
624 goto done;
625 }
626
627 if (word == lastword) {
628 unsigned maxsz = size_for_gvec(fullsz / 8);
629 unsigned oprsz = size_for_gvec(setsz / 8);
630
631 if (oprsz * 8 == setsz) {
632 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
633 goto done;
634 }
635 if (oprsz * 8 == setsz + 8) {
636 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
637 tcg_gen_movi_i64(t, 0);
638 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
639 goto done;
640 }
641 }
642
643 setsz /= 8;
644 fullsz /= 8;
645
646 tcg_gen_movi_i64(t, word);
647 for (i = 0; i < setsz; i += 8) {
648 tcg_gen_st_i64(t, cpu_env, ofs + i);
649 }
650 if (lastword != word) {
651 tcg_gen_movi_i64(t, lastword);
652 tcg_gen_st_i64(t, cpu_env, ofs + i);
653 i += 8;
654 }
655 if (i < fullsz) {
656 tcg_gen_movi_i64(t, 0);
657 for (; i < fullsz; i += 8) {
658 tcg_gen_st_i64(t, cpu_env, ofs + i);
659 }
660 }
661
662 done:
663 tcg_temp_free_i64(t);
664
665 /* PTRUES */
666 if (setflag) {
667 tcg_gen_movi_i32(cpu_NF, -(word != 0));
668 tcg_gen_movi_i32(cpu_CF, word == 0);
669 tcg_gen_movi_i32(cpu_VF, 0);
670 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
671 }
672 return true;
673}
674
675static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
676{
677 return do_predset(s, a->esz, a->rd, a->pat, a->s);
678}
679
680static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
681{
682 /* Note pat == 31 is #all, to set all elements. */
683 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
684}
685
686static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
687{
688 /* Note pat == 32 is #unimp, to set no elements. */
689 return do_predset(s, 0, a->rd, 32, false);
690}
691
692static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
693{
694 /* The path through do_pppp_flags is complicated enough to want to avoid
695 * duplication. Frob the arguments into the form of a predicated AND.
696 */
697 arg_rprr_s alt_a = {
698 .rd = a->rd, .pg = a->pg, .s = a->s,
699 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
700 };
701 return trans_AND_pppp(s, &alt_a, insn);
702}
703
704static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
705{
706 return do_mov_p(s, a->rd, FFR_PRED_NUM);
707}
708
709static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
710{
711 return do_mov_p(s, FFR_PRED_NUM, a->rn);
712}
713
714static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
715 void (*gen_fn)(TCGv_i32, TCGv_ptr,
716 TCGv_ptr, TCGv_i32))
717{
718 if (!sve_access_check(s)) {
719 return true;
720 }
721
722 TCGv_ptr t_pd = tcg_temp_new_ptr();
723 TCGv_ptr t_pg = tcg_temp_new_ptr();
724 TCGv_i32 t;
725 unsigned desc;
726
727 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
728 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
729
730 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
731 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
732 t = tcg_const_i32(desc);
733
734 gen_fn(t, t_pd, t_pg, t);
735 tcg_temp_free_ptr(t_pd);
736 tcg_temp_free_ptr(t_pg);
737
738 do_pred_flags(t);
739 tcg_temp_free_i32(t);
740 return true;
741}
742
743static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
744{
745 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
746}
747
748static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
749{
750 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
751}
752
d1822297
RH
753/*
754 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
755 */
756
757/* Subroutine loading a vector register at VOFS of LEN bytes.
758 * The load should begin at the address Rn + IMM.
759 */
760
761static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
762 int rn, int imm)
763{
764 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
765 uint32_t len_remain = len % 8;
766 uint32_t nparts = len / 8 + ctpop8(len_remain);
767 int midx = get_mem_index(s);
768 TCGv_i64 addr, t0, t1;
769
770 addr = tcg_temp_new_i64();
771 t0 = tcg_temp_new_i64();
772
773 /* Note that unpredicated load/store of vector/predicate registers
774 * are defined as a stream of bytes, which equates to little-endian
775 * operations on larger quantities. There is no nice way to force
776 * a little-endian load for aarch64_be-linux-user out of line.
777 *
778 * Attempt to keep code expansion to a minimum by limiting the
779 * amount of unrolling done.
780 */
781 if (nparts <= 4) {
782 int i;
783
784 for (i = 0; i < len_align; i += 8) {
785 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
786 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
787 tcg_gen_st_i64(t0, cpu_env, vofs + i);
788 }
789 } else {
790 TCGLabel *loop = gen_new_label();
791 TCGv_ptr tp, i = tcg_const_local_ptr(0);
792
793 gen_set_label(loop);
794
795 /* Minimize the number of local temps that must be re-read from
796 * the stack each iteration. Instead, re-compute values other
797 * than the loop counter.
798 */
799 tp = tcg_temp_new_ptr();
800 tcg_gen_addi_ptr(tp, i, imm);
801 tcg_gen_extu_ptr_i64(addr, tp);
802 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
803
804 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
805
806 tcg_gen_add_ptr(tp, cpu_env, i);
807 tcg_gen_addi_ptr(i, i, 8);
808 tcg_gen_st_i64(t0, tp, vofs);
809 tcg_temp_free_ptr(tp);
810
811 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
812 tcg_temp_free_ptr(i);
813 }
814
815 /* Predicate register loads can be any multiple of 2.
816 * Note that we still store the entire 64-bit unit into cpu_env.
817 */
818 if (len_remain) {
819 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
820
821 switch (len_remain) {
822 case 2:
823 case 4:
824 case 8:
825 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
826 break;
827
828 case 6:
829 t1 = tcg_temp_new_i64();
830 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
831 tcg_gen_addi_i64(addr, addr, 4);
832 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
833 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
834 tcg_temp_free_i64(t1);
835 break;
836
837 default:
838 g_assert_not_reached();
839 }
840 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
841 }
842 tcg_temp_free_i64(addr);
843 tcg_temp_free_i64(t0);
844}
845
846static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
847{
848 if (sve_access_check(s)) {
849 int size = vec_full_reg_size(s);
850 int off = vec_full_reg_offset(s, a->rd);
851 do_ldr(s, off, size, a->rn, a->imm * size);
852 }
853 return true;
854}
855
856static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
857{
858 if (sve_access_check(s)) {
859 int size = pred_full_reg_size(s);
860 int off = pred_full_reg_offset(s, a->rd);
861 do_ldr(s, off, size, a->rn, a->imm * size);
862 }
863 return true;
864}