]> git.proxmox.com Git - mirror_qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement SVE floating-point trig select coefficient
[mirror_qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
35
ccd841c3
RH
36/*
37 * Helpers for extracting complex instruction fields.
38 */
39
40/* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
42 */
43static int tszimm_esz(int x)
44{
45 x >>= 3; /* discard imm3 */
46 return 31 - clz32(x);
47}
48
49static int tszimm_shr(int x)
50{
51 return (16 << tszimm_esz(x)) - x;
52}
53
54/* See e.g. LSL (immediate, predicated). */
55static int tszimm_shl(int x)
56{
57 return x - (8 << tszimm_esz(x));
58}
59
38388f7e
RH
60/*
61 * Include the generated decoder.
62 */
63
64#include "decode-sve.inc.c"
65
66/*
67 * Implement all of the translator functions referenced by the decoder.
68 */
69
d1822297
RH
70/* Return the offset info CPUARMState of the predicate vector register Pn.
71 * Note for this purpose, FFR is P16.
72 */
73static inline int pred_full_reg_offset(DisasContext *s, int regno)
74{
75 return offsetof(CPUARMState, vfp.pregs[regno]);
76}
77
78/* Return the byte size of the whole predicate register, VL / 64. */
79static inline int pred_full_reg_size(DisasContext *s)
80{
81 return s->sve_len >> 3;
82}
83
516e246a
RH
84/* Round up the size of a register to a size allowed by
85 * the tcg vector infrastructure. Any operation which uses this
86 * size may assume that the bits above pred_full_reg_size are zero,
87 * and must leave them the same way.
88 *
89 * Note that this is not needed for the vector registers as they
90 * are always properly sized for tcg vectors.
91 */
92static int size_for_gvec(int size)
93{
94 if (size <= 8) {
95 return 8;
96 } else {
97 return QEMU_ALIGN_UP(size, 16);
98 }
99}
100
101static int pred_gvec_reg_size(DisasContext *s)
102{
103 return size_for_gvec(pred_full_reg_size(s));
104}
105
39eea561
RH
106/* Invoke a vector expander on two Zregs. */
107static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
108 int esz, int rd, int rn)
38388f7e 109{
39eea561
RH
110 if (sve_access_check(s)) {
111 unsigned vsz = vec_full_reg_size(s);
112 gvec_fn(esz, vec_full_reg_offset(s, rd),
113 vec_full_reg_offset(s, rn), vsz, vsz);
114 }
115 return true;
38388f7e
RH
116}
117
39eea561
RH
118/* Invoke a vector expander on three Zregs. */
119static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
120 int esz, int rd, int rn, int rm)
38388f7e 121{
39eea561
RH
122 if (sve_access_check(s)) {
123 unsigned vsz = vec_full_reg_size(s);
124 gvec_fn(esz, vec_full_reg_offset(s, rd),
125 vec_full_reg_offset(s, rn),
126 vec_full_reg_offset(s, rm), vsz, vsz);
127 }
128 return true;
38388f7e
RH
129}
130
39eea561
RH
131/* Invoke a vector move on two Zregs. */
132static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 133{
39eea561 134 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
135}
136
d9d78dcc
RH
137/* Initialize a Zreg with replications of a 64-bit immediate. */
138static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
139{
140 unsigned vsz = vec_full_reg_size(s);
141 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
142}
143
516e246a
RH
144/* Invoke a vector expander on two Pregs. */
145static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
146 int esz, int rd, int rn)
147{
148 if (sve_access_check(s)) {
149 unsigned psz = pred_gvec_reg_size(s);
150 gvec_fn(esz, pred_full_reg_offset(s, rd),
151 pred_full_reg_offset(s, rn), psz, psz);
152 }
153 return true;
154}
155
156/* Invoke a vector expander on three Pregs. */
157static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
158 int esz, int rd, int rn, int rm)
159{
160 if (sve_access_check(s)) {
161 unsigned psz = pred_gvec_reg_size(s);
162 gvec_fn(esz, pred_full_reg_offset(s, rd),
163 pred_full_reg_offset(s, rn),
164 pred_full_reg_offset(s, rm), psz, psz);
165 }
166 return true;
167}
168
169/* Invoke a vector operation on four Pregs. */
170static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
171 int rd, int rn, int rm, int rg)
172{
173 if (sve_access_check(s)) {
174 unsigned psz = pred_gvec_reg_size(s);
175 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
176 pred_full_reg_offset(s, rn),
177 pred_full_reg_offset(s, rm),
178 pred_full_reg_offset(s, rg),
179 psz, psz, gvec_op);
180 }
181 return true;
182}
183
184/* Invoke a vector move on two Pregs. */
185static bool do_mov_p(DisasContext *s, int rd, int rn)
186{
187 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
188}
189
9e18d7a6
RH
190/* Set the cpu flags as per a return from an SVE helper. */
191static void do_pred_flags(TCGv_i32 t)
192{
193 tcg_gen_mov_i32(cpu_NF, t);
194 tcg_gen_andi_i32(cpu_ZF, t, 2);
195 tcg_gen_andi_i32(cpu_CF, t, 1);
196 tcg_gen_movi_i32(cpu_VF, 0);
197}
198
199/* Subroutines computing the ARM PredTest psuedofunction. */
200static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
201{
202 TCGv_i32 t = tcg_temp_new_i32();
203
204 gen_helper_sve_predtest1(t, d, g);
205 do_pred_flags(t);
206 tcg_temp_free_i32(t);
207}
208
209static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
210{
211 TCGv_ptr dptr = tcg_temp_new_ptr();
212 TCGv_ptr gptr = tcg_temp_new_ptr();
213 TCGv_i32 t;
214
215 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
216 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
217 t = tcg_const_i32(words);
218
219 gen_helper_sve_predtest(t, dptr, gptr, t);
220 tcg_temp_free_ptr(dptr);
221 tcg_temp_free_ptr(gptr);
222
223 do_pred_flags(t);
224 tcg_temp_free_i32(t);
225}
226
028e2a7b
RH
227/* For each element size, the bits within a predicate word that are active. */
228const uint64_t pred_esz_masks[4] = {
229 0xffffffffffffffffull, 0x5555555555555555ull,
230 0x1111111111111111ull, 0x0101010101010101ull
231};
232
39eea561
RH
233/*
234 *** SVE Logical - Unpredicated Group
235 */
236
237static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
238{
239 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
240}
241
242static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
243{
244 if (a->rn == a->rm) { /* MOV */
245 return do_mov_z(s, a->rd, a->rn);
246 } else {
247 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
248 }
249}
250
251static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
252{
253 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
254}
255
256static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 257{
39eea561 258 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 259}
d1822297 260
fea98f9c
RH
261/*
262 *** SVE Integer Arithmetic - Unpredicated Group
263 */
264
265static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
266{
267 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
268}
269
270static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
271{
272 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
273}
274
275static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
276{
277 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
278}
279
280static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
281{
282 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
283}
284
285static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
286{
287 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
288}
289
290static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291{
292 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
293}
294
f97cfd59
RH
295/*
296 *** SVE Integer Arithmetic - Binary Predicated Group
297 */
298
299static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
300{
301 unsigned vsz = vec_full_reg_size(s);
302 if (fn == NULL) {
303 return false;
304 }
305 if (sve_access_check(s)) {
306 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
307 vec_full_reg_offset(s, a->rn),
308 vec_full_reg_offset(s, a->rm),
309 pred_full_reg_offset(s, a->pg),
310 vsz, vsz, 0, fn);
311 }
312 return true;
313}
314
315#define DO_ZPZZ(NAME, name) \
316static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
317 uint32_t insn) \
318{ \
319 static gen_helper_gvec_4 * const fns[4] = { \
320 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
321 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
322 }; \
323 return do_zpzz_ool(s, a, fns[a->esz]); \
324}
325
326DO_ZPZZ(AND, and)
327DO_ZPZZ(EOR, eor)
328DO_ZPZZ(ORR, orr)
329DO_ZPZZ(BIC, bic)
330
331DO_ZPZZ(ADD, add)
332DO_ZPZZ(SUB, sub)
333
334DO_ZPZZ(SMAX, smax)
335DO_ZPZZ(UMAX, umax)
336DO_ZPZZ(SMIN, smin)
337DO_ZPZZ(UMIN, umin)
338DO_ZPZZ(SABD, sabd)
339DO_ZPZZ(UABD, uabd)
340
341DO_ZPZZ(MUL, mul)
342DO_ZPZZ(SMULH, smulh)
343DO_ZPZZ(UMULH, umulh)
344
27721dbb
RH
345DO_ZPZZ(ASR, asr)
346DO_ZPZZ(LSR, lsr)
347DO_ZPZZ(LSL, lsl)
348
f97cfd59
RH
349static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
350{
351 static gen_helper_gvec_4 * const fns[4] = {
352 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
353 };
354 return do_zpzz_ool(s, a, fns[a->esz]);
355}
356
357static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
358{
359 static gen_helper_gvec_4 * const fns[4] = {
360 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
361 };
362 return do_zpzz_ool(s, a, fns[a->esz]);
363}
364
365#undef DO_ZPZZ
366
afac6d04
RH
367/*
368 *** SVE Integer Arithmetic - Unary Predicated Group
369 */
370
371static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
372{
373 if (fn == NULL) {
374 return false;
375 }
376 if (sve_access_check(s)) {
377 unsigned vsz = vec_full_reg_size(s);
378 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
379 vec_full_reg_offset(s, a->rn),
380 pred_full_reg_offset(s, a->pg),
381 vsz, vsz, 0, fn);
382 }
383 return true;
384}
385
386#define DO_ZPZ(NAME, name) \
387static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
388{ \
389 static gen_helper_gvec_3 * const fns[4] = { \
390 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
391 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
392 }; \
393 return do_zpz_ool(s, a, fns[a->esz]); \
394}
395
396DO_ZPZ(CLS, cls)
397DO_ZPZ(CLZ, clz)
398DO_ZPZ(CNT_zpz, cnt_zpz)
399DO_ZPZ(CNOT, cnot)
400DO_ZPZ(NOT_zpz, not_zpz)
401DO_ZPZ(ABS, abs)
402DO_ZPZ(NEG, neg)
403
404static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
405{
406 static gen_helper_gvec_3 * const fns[4] = {
407 NULL,
408 gen_helper_sve_fabs_h,
409 gen_helper_sve_fabs_s,
410 gen_helper_sve_fabs_d
411 };
412 return do_zpz_ool(s, a, fns[a->esz]);
413}
414
415static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
416{
417 static gen_helper_gvec_3 * const fns[4] = {
418 NULL,
419 gen_helper_sve_fneg_h,
420 gen_helper_sve_fneg_s,
421 gen_helper_sve_fneg_d
422 };
423 return do_zpz_ool(s, a, fns[a->esz]);
424}
425
426static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
427{
428 static gen_helper_gvec_3 * const fns[4] = {
429 NULL,
430 gen_helper_sve_sxtb_h,
431 gen_helper_sve_sxtb_s,
432 gen_helper_sve_sxtb_d
433 };
434 return do_zpz_ool(s, a, fns[a->esz]);
435}
436
437static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
438{
439 static gen_helper_gvec_3 * const fns[4] = {
440 NULL,
441 gen_helper_sve_uxtb_h,
442 gen_helper_sve_uxtb_s,
443 gen_helper_sve_uxtb_d
444 };
445 return do_zpz_ool(s, a, fns[a->esz]);
446}
447
448static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
449{
450 static gen_helper_gvec_3 * const fns[4] = {
451 NULL, NULL,
452 gen_helper_sve_sxth_s,
453 gen_helper_sve_sxth_d
454 };
455 return do_zpz_ool(s, a, fns[a->esz]);
456}
457
458static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
459{
460 static gen_helper_gvec_3 * const fns[4] = {
461 NULL, NULL,
462 gen_helper_sve_uxth_s,
463 gen_helper_sve_uxth_d
464 };
465 return do_zpz_ool(s, a, fns[a->esz]);
466}
467
468static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
469{
470 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
471}
472
473static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
474{
475 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
476}
477
478#undef DO_ZPZ
479
047cec97
RH
480/*
481 *** SVE Integer Reduction Group
482 */
483
484typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
485static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
486 gen_helper_gvec_reduc *fn)
487{
488 unsigned vsz = vec_full_reg_size(s);
489 TCGv_ptr t_zn, t_pg;
490 TCGv_i32 desc;
491 TCGv_i64 temp;
492
493 if (fn == NULL) {
494 return false;
495 }
496 if (!sve_access_check(s)) {
497 return true;
498 }
499
500 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
501 temp = tcg_temp_new_i64();
502 t_zn = tcg_temp_new_ptr();
503 t_pg = tcg_temp_new_ptr();
504
505 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
506 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
507 fn(temp, t_zn, t_pg, desc);
508 tcg_temp_free_ptr(t_zn);
509 tcg_temp_free_ptr(t_pg);
510 tcg_temp_free_i32(desc);
511
512 write_fp_dreg(s, a->rd, temp);
513 tcg_temp_free_i64(temp);
514 return true;
515}
516
517#define DO_VPZ(NAME, name) \
518static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
519{ \
520 static gen_helper_gvec_reduc * const fns[4] = { \
521 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
522 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
523 }; \
524 return do_vpz_ool(s, a, fns[a->esz]); \
525}
526
527DO_VPZ(ORV, orv)
528DO_VPZ(ANDV, andv)
529DO_VPZ(EORV, eorv)
530
531DO_VPZ(UADDV, uaddv)
532DO_VPZ(SMAXV, smaxv)
533DO_VPZ(UMAXV, umaxv)
534DO_VPZ(SMINV, sminv)
535DO_VPZ(UMINV, uminv)
536
537static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
538{
539 static gen_helper_gvec_reduc * const fns[4] = {
540 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
541 gen_helper_sve_saddv_s, NULL
542 };
543 return do_vpz_ool(s, a, fns[a->esz]);
544}
545
546#undef DO_VPZ
547
ccd841c3
RH
548/*
549 *** SVE Shift by Immediate - Predicated Group
550 */
551
552/* Store zero into every active element of Zd. We will use this for two
553 * and three-operand predicated instructions for which logic dictates a
554 * zero result.
555 */
556static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
557{
558 static gen_helper_gvec_2 * const fns[4] = {
559 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
560 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
561 };
562 if (sve_access_check(s)) {
563 unsigned vsz = vec_full_reg_size(s);
564 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
565 pred_full_reg_offset(s, pg),
566 vsz, vsz, 0, fns[esz]);
567 }
568 return true;
569}
570
571static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
572 gen_helper_gvec_3 *fn)
573{
574 if (sve_access_check(s)) {
575 unsigned vsz = vec_full_reg_size(s);
576 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
577 vec_full_reg_offset(s, a->rn),
578 pred_full_reg_offset(s, a->pg),
579 vsz, vsz, a->imm, fn);
580 }
581 return true;
582}
583
584static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
585{
586 static gen_helper_gvec_3 * const fns[4] = {
587 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
588 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
589 };
590 if (a->esz < 0) {
591 /* Invalid tsz encoding -- see tszimm_esz. */
592 return false;
593 }
594 /* Shift by element size is architecturally valid. For
595 arithmetic right-shift, it's the same as by one less. */
596 a->imm = MIN(a->imm, (8 << a->esz) - 1);
597 return do_zpzi_ool(s, a, fns[a->esz]);
598}
599
600static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
601{
602 static gen_helper_gvec_3 * const fns[4] = {
603 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
604 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
605 };
606 if (a->esz < 0) {
607 return false;
608 }
609 /* Shift by element size is architecturally valid.
610 For logical shifts, it is a zeroing operation. */
611 if (a->imm >= (8 << a->esz)) {
612 return do_clr_zp(s, a->rd, a->pg, a->esz);
613 } else {
614 return do_zpzi_ool(s, a, fns[a->esz]);
615 }
616}
617
618static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
619{
620 static gen_helper_gvec_3 * const fns[4] = {
621 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
622 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
623 };
624 if (a->esz < 0) {
625 return false;
626 }
627 /* Shift by element size is architecturally valid.
628 For logical shifts, it is a zeroing operation. */
629 if (a->imm >= (8 << a->esz)) {
630 return do_clr_zp(s, a->rd, a->pg, a->esz);
631 } else {
632 return do_zpzi_ool(s, a, fns[a->esz]);
633 }
634}
635
636static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
637{
638 static gen_helper_gvec_3 * const fns[4] = {
639 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
640 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
641 };
642 if (a->esz < 0) {
643 return false;
644 }
645 /* Shift by element size is architecturally valid. For arithmetic
646 right shift for division, it is a zeroing operation. */
647 if (a->imm >= (8 << a->esz)) {
648 return do_clr_zp(s, a->rd, a->pg, a->esz);
649 } else {
650 return do_zpzi_ool(s, a, fns[a->esz]);
651 }
652}
653
fe7f8dfb
RH
654/*
655 *** SVE Bitwise Shift - Predicated Group
656 */
657
658#define DO_ZPZW(NAME, name) \
659static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
660 uint32_t insn) \
661{ \
662 static gen_helper_gvec_4 * const fns[3] = { \
663 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
664 gen_helper_sve_##name##_zpzw_s, \
665 }; \
666 if (a->esz < 0 || a->esz >= 3) { \
667 return false; \
668 } \
669 return do_zpzz_ool(s, a, fns[a->esz]); \
670}
671
672DO_ZPZW(ASR, asr)
673DO_ZPZW(LSR, lsr)
674DO_ZPZW(LSL, lsl)
675
676#undef DO_ZPZW
677
d9d78dcc
RH
678/*
679 *** SVE Bitwise Shift - Unpredicated Group
680 */
681
682static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
683 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
684 int64_t, uint32_t, uint32_t))
685{
686 if (a->esz < 0) {
687 /* Invalid tsz encoding -- see tszimm_esz. */
688 return false;
689 }
690 if (sve_access_check(s)) {
691 unsigned vsz = vec_full_reg_size(s);
692 /* Shift by element size is architecturally valid. For
693 arithmetic right-shift, it's the same as by one less.
694 Otherwise it is a zeroing operation. */
695 if (a->imm >= 8 << a->esz) {
696 if (asr) {
697 a->imm = (8 << a->esz) - 1;
698 } else {
699 do_dupi_z(s, a->rd, 0);
700 return true;
701 }
702 }
703 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
704 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
705 }
706 return true;
707}
708
709static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
710{
711 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
712}
713
714static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
715{
716 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
717}
718
719static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
720{
721 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
722}
723
724static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
725{
726 if (fn == NULL) {
727 return false;
728 }
729 if (sve_access_check(s)) {
730 unsigned vsz = vec_full_reg_size(s);
731 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
732 vec_full_reg_offset(s, a->rn),
733 vec_full_reg_offset(s, a->rm),
734 vsz, vsz, 0, fn);
735 }
736 return true;
737}
738
739#define DO_ZZW(NAME, name) \
740static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
741 uint32_t insn) \
742{ \
743 static gen_helper_gvec_3 * const fns[4] = { \
744 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
745 gen_helper_sve_##name##_zzw_s, NULL \
746 }; \
747 return do_zzw_ool(s, a, fns[a->esz]); \
748}
749
750DO_ZZW(ASR, asr)
751DO_ZZW(LSR, lsr)
752DO_ZZW(LSL, lsl)
753
754#undef DO_ZZW
755
96a36e4a
RH
756/*
757 *** SVE Integer Multiply-Add Group
758 */
759
760static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
761 gen_helper_gvec_5 *fn)
762{
763 if (sve_access_check(s)) {
764 unsigned vsz = vec_full_reg_size(s);
765 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
766 vec_full_reg_offset(s, a->ra),
767 vec_full_reg_offset(s, a->rn),
768 vec_full_reg_offset(s, a->rm),
769 pred_full_reg_offset(s, a->pg),
770 vsz, vsz, 0, fn);
771 }
772 return true;
773}
774
775#define DO_ZPZZZ(NAME, name) \
776static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
777{ \
778 static gen_helper_gvec_5 * const fns[4] = { \
779 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
780 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
781 }; \
782 return do_zpzzz_ool(s, a, fns[a->esz]); \
783}
784
785DO_ZPZZZ(MLA, mla)
786DO_ZPZZZ(MLS, mls)
787
788#undef DO_ZPZZZ
789
9a56c9c3
RH
790/*
791 *** SVE Index Generation Group
792 */
793
794static void do_index(DisasContext *s, int esz, int rd,
795 TCGv_i64 start, TCGv_i64 incr)
796{
797 unsigned vsz = vec_full_reg_size(s);
798 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
799 TCGv_ptr t_zd = tcg_temp_new_ptr();
800
801 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
802 if (esz == 3) {
803 gen_helper_sve_index_d(t_zd, start, incr, desc);
804 } else {
805 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
806 static index_fn * const fns[3] = {
807 gen_helper_sve_index_b,
808 gen_helper_sve_index_h,
809 gen_helper_sve_index_s,
810 };
811 TCGv_i32 s32 = tcg_temp_new_i32();
812 TCGv_i32 i32 = tcg_temp_new_i32();
813
814 tcg_gen_extrl_i64_i32(s32, start);
815 tcg_gen_extrl_i64_i32(i32, incr);
816 fns[esz](t_zd, s32, i32, desc);
817
818 tcg_temp_free_i32(s32);
819 tcg_temp_free_i32(i32);
820 }
821 tcg_temp_free_ptr(t_zd);
822 tcg_temp_free_i32(desc);
823}
824
825static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
826{
827 if (sve_access_check(s)) {
828 TCGv_i64 start = tcg_const_i64(a->imm1);
829 TCGv_i64 incr = tcg_const_i64(a->imm2);
830 do_index(s, a->esz, a->rd, start, incr);
831 tcg_temp_free_i64(start);
832 tcg_temp_free_i64(incr);
833 }
834 return true;
835}
836
837static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
838{
839 if (sve_access_check(s)) {
840 TCGv_i64 start = tcg_const_i64(a->imm);
841 TCGv_i64 incr = cpu_reg(s, a->rm);
842 do_index(s, a->esz, a->rd, start, incr);
843 tcg_temp_free_i64(start);
844 }
845 return true;
846}
847
848static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
849{
850 if (sve_access_check(s)) {
851 TCGv_i64 start = cpu_reg(s, a->rn);
852 TCGv_i64 incr = tcg_const_i64(a->imm);
853 do_index(s, a->esz, a->rd, start, incr);
854 tcg_temp_free_i64(incr);
855 }
856 return true;
857}
858
859static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
860{
861 if (sve_access_check(s)) {
862 TCGv_i64 start = cpu_reg(s, a->rn);
863 TCGv_i64 incr = cpu_reg(s, a->rm);
864 do_index(s, a->esz, a->rd, start, incr);
865 }
866 return true;
867}
868
96f922cc
RH
869/*
870 *** SVE Stack Allocation Group
871 */
872
873static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
874{
875 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
876 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
877 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
878 return true;
879}
880
881static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
882{
883 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
884 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
885 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
886 return true;
887}
888
889static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
890{
891 TCGv_i64 reg = cpu_reg(s, a->rd);
892 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
893 return true;
894}
895
4b242d9c
RH
896/*
897 *** SVE Compute Vector Address Group
898 */
899
900static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
901{
902 if (sve_access_check(s)) {
903 unsigned vsz = vec_full_reg_size(s);
904 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
905 vec_full_reg_offset(s, a->rn),
906 vec_full_reg_offset(s, a->rm),
907 vsz, vsz, a->imm, fn);
908 }
909 return true;
910}
911
912static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
913{
914 return do_adr(s, a, gen_helper_sve_adr_p32);
915}
916
917static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
918{
919 return do_adr(s, a, gen_helper_sve_adr_p64);
920}
921
922static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
923{
924 return do_adr(s, a, gen_helper_sve_adr_s32);
925}
926
927static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
928{
929 return do_adr(s, a, gen_helper_sve_adr_u32);
930}
931
0762cd42
RH
932/*
933 *** SVE Integer Misc - Unpredicated Group
934 */
935
936static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
937{
938 static gen_helper_gvec_2 * const fns[4] = {
939 NULL,
940 gen_helper_sve_fexpa_h,
941 gen_helper_sve_fexpa_s,
942 gen_helper_sve_fexpa_d,
943 };
944 if (a->esz == 0) {
945 return false;
946 }
947 if (sve_access_check(s)) {
948 unsigned vsz = vec_full_reg_size(s);
949 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
950 vec_full_reg_offset(s, a->rn),
951 vsz, vsz, 0, fns[a->esz]);
952 }
953 return true;
954}
955
a1f233f2
RH
956static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
957{
958 static gen_helper_gvec_3 * const fns[4] = {
959 NULL,
960 gen_helper_sve_ftssel_h,
961 gen_helper_sve_ftssel_s,
962 gen_helper_sve_ftssel_d,
963 };
964 if (a->esz == 0) {
965 return false;
966 }
967 if (sve_access_check(s)) {
968 unsigned vsz = vec_full_reg_size(s);
969 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
970 vec_full_reg_offset(s, a->rn),
971 vec_full_reg_offset(s, a->rm),
972 vsz, vsz, 0, fns[a->esz]);
973 }
974 return true;
975}
976
516e246a
RH
977/*
978 *** SVE Predicate Logical Operations Group
979 */
980
981static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
982 const GVecGen4 *gvec_op)
983{
984 if (!sve_access_check(s)) {
985 return true;
986 }
987
988 unsigned psz = pred_gvec_reg_size(s);
989 int dofs = pred_full_reg_offset(s, a->rd);
990 int nofs = pred_full_reg_offset(s, a->rn);
991 int mofs = pred_full_reg_offset(s, a->rm);
992 int gofs = pred_full_reg_offset(s, a->pg);
993
994 if (psz == 8) {
995 /* Do the operation and the flags generation in temps. */
996 TCGv_i64 pd = tcg_temp_new_i64();
997 TCGv_i64 pn = tcg_temp_new_i64();
998 TCGv_i64 pm = tcg_temp_new_i64();
999 TCGv_i64 pg = tcg_temp_new_i64();
1000
1001 tcg_gen_ld_i64(pn, cpu_env, nofs);
1002 tcg_gen_ld_i64(pm, cpu_env, mofs);
1003 tcg_gen_ld_i64(pg, cpu_env, gofs);
1004
1005 gvec_op->fni8(pd, pn, pm, pg);
1006 tcg_gen_st_i64(pd, cpu_env, dofs);
1007
1008 do_predtest1(pd, pg);
1009
1010 tcg_temp_free_i64(pd);
1011 tcg_temp_free_i64(pn);
1012 tcg_temp_free_i64(pm);
1013 tcg_temp_free_i64(pg);
1014 } else {
1015 /* The operation and flags generation is large. The computation
1016 * of the flags depends on the original contents of the guarding
1017 * predicate. If the destination overwrites the guarding predicate,
1018 * then the easiest way to get this right is to save a copy.
1019 */
1020 int tofs = gofs;
1021 if (a->rd == a->pg) {
1022 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1023 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1024 }
1025
1026 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1027 do_predtest(s, dofs, tofs, psz / 8);
1028 }
1029 return true;
1030}
1031
1032static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1033{
1034 tcg_gen_and_i64(pd, pn, pm);
1035 tcg_gen_and_i64(pd, pd, pg);
1036}
1037
1038static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1039 TCGv_vec pm, TCGv_vec pg)
1040{
1041 tcg_gen_and_vec(vece, pd, pn, pm);
1042 tcg_gen_and_vec(vece, pd, pd, pg);
1043}
1044
1045static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1046{
1047 static const GVecGen4 op = {
1048 .fni8 = gen_and_pg_i64,
1049 .fniv = gen_and_pg_vec,
1050 .fno = gen_helper_sve_and_pppp,
1051 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1052 };
1053 if (a->s) {
1054 return do_pppp_flags(s, a, &op);
1055 } else if (a->rn == a->rm) {
1056 if (a->pg == a->rn) {
1057 return do_mov_p(s, a->rd, a->rn);
1058 } else {
1059 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1060 }
1061 } else if (a->pg == a->rn || a->pg == a->rm) {
1062 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1063 } else {
1064 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1065 }
1066}
1067
1068static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1069{
1070 tcg_gen_andc_i64(pd, pn, pm);
1071 tcg_gen_and_i64(pd, pd, pg);
1072}
1073
1074static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1075 TCGv_vec pm, TCGv_vec pg)
1076{
1077 tcg_gen_andc_vec(vece, pd, pn, pm);
1078 tcg_gen_and_vec(vece, pd, pd, pg);
1079}
1080
1081static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1082{
1083 static const GVecGen4 op = {
1084 .fni8 = gen_bic_pg_i64,
1085 .fniv = gen_bic_pg_vec,
1086 .fno = gen_helper_sve_bic_pppp,
1087 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1088 };
1089 if (a->s) {
1090 return do_pppp_flags(s, a, &op);
1091 } else if (a->pg == a->rn) {
1092 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1093 } else {
1094 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1095 }
1096}
1097
1098static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1099{
1100 tcg_gen_xor_i64(pd, pn, pm);
1101 tcg_gen_and_i64(pd, pd, pg);
1102}
1103
1104static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1105 TCGv_vec pm, TCGv_vec pg)
1106{
1107 tcg_gen_xor_vec(vece, pd, pn, pm);
1108 tcg_gen_and_vec(vece, pd, pd, pg);
1109}
1110
1111static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1112{
1113 static const GVecGen4 op = {
1114 .fni8 = gen_eor_pg_i64,
1115 .fniv = gen_eor_pg_vec,
1116 .fno = gen_helper_sve_eor_pppp,
1117 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1118 };
1119 if (a->s) {
1120 return do_pppp_flags(s, a, &op);
1121 } else {
1122 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1123 }
1124}
1125
1126static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1127{
1128 tcg_gen_and_i64(pn, pn, pg);
1129 tcg_gen_andc_i64(pm, pm, pg);
1130 tcg_gen_or_i64(pd, pn, pm);
1131}
1132
1133static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1134 TCGv_vec pm, TCGv_vec pg)
1135{
1136 tcg_gen_and_vec(vece, pn, pn, pg);
1137 tcg_gen_andc_vec(vece, pm, pm, pg);
1138 tcg_gen_or_vec(vece, pd, pn, pm);
1139}
1140
1141static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1142{
1143 static const GVecGen4 op = {
1144 .fni8 = gen_sel_pg_i64,
1145 .fniv = gen_sel_pg_vec,
1146 .fno = gen_helper_sve_sel_pppp,
1147 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1148 };
1149 if (a->s) {
1150 return false;
1151 } else {
1152 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1153 }
1154}
1155
1156static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1157{
1158 tcg_gen_or_i64(pd, pn, pm);
1159 tcg_gen_and_i64(pd, pd, pg);
1160}
1161
1162static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1163 TCGv_vec pm, TCGv_vec pg)
1164{
1165 tcg_gen_or_vec(vece, pd, pn, pm);
1166 tcg_gen_and_vec(vece, pd, pd, pg);
1167}
1168
1169static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1170{
1171 static const GVecGen4 op = {
1172 .fni8 = gen_orr_pg_i64,
1173 .fniv = gen_orr_pg_vec,
1174 .fno = gen_helper_sve_orr_pppp,
1175 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1176 };
1177 if (a->s) {
1178 return do_pppp_flags(s, a, &op);
1179 } else if (a->pg == a->rn && a->rn == a->rm) {
1180 return do_mov_p(s, a->rd, a->rn);
1181 } else {
1182 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1183 }
1184}
1185
1186static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1187{
1188 tcg_gen_orc_i64(pd, pn, pm);
1189 tcg_gen_and_i64(pd, pd, pg);
1190}
1191
1192static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1193 TCGv_vec pm, TCGv_vec pg)
1194{
1195 tcg_gen_orc_vec(vece, pd, pn, pm);
1196 tcg_gen_and_vec(vece, pd, pd, pg);
1197}
1198
1199static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1200{
1201 static const GVecGen4 op = {
1202 .fni8 = gen_orn_pg_i64,
1203 .fniv = gen_orn_pg_vec,
1204 .fno = gen_helper_sve_orn_pppp,
1205 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1206 };
1207 if (a->s) {
1208 return do_pppp_flags(s, a, &op);
1209 } else {
1210 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1211 }
1212}
1213
1214static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1215{
1216 tcg_gen_or_i64(pd, pn, pm);
1217 tcg_gen_andc_i64(pd, pg, pd);
1218}
1219
1220static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1221 TCGv_vec pm, TCGv_vec pg)
1222{
1223 tcg_gen_or_vec(vece, pd, pn, pm);
1224 tcg_gen_andc_vec(vece, pd, pg, pd);
1225}
1226
1227static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1228{
1229 static const GVecGen4 op = {
1230 .fni8 = gen_nor_pg_i64,
1231 .fniv = gen_nor_pg_vec,
1232 .fno = gen_helper_sve_nor_pppp,
1233 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1234 };
1235 if (a->s) {
1236 return do_pppp_flags(s, a, &op);
1237 } else {
1238 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1239 }
1240}
1241
1242static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1243{
1244 tcg_gen_and_i64(pd, pn, pm);
1245 tcg_gen_andc_i64(pd, pg, pd);
1246}
1247
1248static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1249 TCGv_vec pm, TCGv_vec pg)
1250{
1251 tcg_gen_and_vec(vece, pd, pn, pm);
1252 tcg_gen_andc_vec(vece, pd, pg, pd);
1253}
1254
1255static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1256{
1257 static const GVecGen4 op = {
1258 .fni8 = gen_nand_pg_i64,
1259 .fniv = gen_nand_pg_vec,
1260 .fno = gen_helper_sve_nand_pppp,
1261 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1262 };
1263 if (a->s) {
1264 return do_pppp_flags(s, a, &op);
1265 } else {
1266 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1267 }
1268}
1269
9e18d7a6
RH
1270/*
1271 *** SVE Predicate Misc Group
1272 */
1273
1274static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1275{
1276 if (sve_access_check(s)) {
1277 int nofs = pred_full_reg_offset(s, a->rn);
1278 int gofs = pred_full_reg_offset(s, a->pg);
1279 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1280
1281 if (words == 1) {
1282 TCGv_i64 pn = tcg_temp_new_i64();
1283 TCGv_i64 pg = tcg_temp_new_i64();
1284
1285 tcg_gen_ld_i64(pn, cpu_env, nofs);
1286 tcg_gen_ld_i64(pg, cpu_env, gofs);
1287 do_predtest1(pn, pg);
1288
1289 tcg_temp_free_i64(pn);
1290 tcg_temp_free_i64(pg);
1291 } else {
1292 do_predtest(s, nofs, gofs, words);
1293 }
1294 }
1295 return true;
1296}
1297
028e2a7b
RH
1298/* See the ARM pseudocode DecodePredCount. */
1299static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1300{
1301 unsigned elements = fullsz >> esz;
1302 unsigned bound;
1303
1304 switch (pattern) {
1305 case 0x0: /* POW2 */
1306 return pow2floor(elements);
1307 case 0x1: /* VL1 */
1308 case 0x2: /* VL2 */
1309 case 0x3: /* VL3 */
1310 case 0x4: /* VL4 */
1311 case 0x5: /* VL5 */
1312 case 0x6: /* VL6 */
1313 case 0x7: /* VL7 */
1314 case 0x8: /* VL8 */
1315 bound = pattern;
1316 break;
1317 case 0x9: /* VL16 */
1318 case 0xa: /* VL32 */
1319 case 0xb: /* VL64 */
1320 case 0xc: /* VL128 */
1321 case 0xd: /* VL256 */
1322 bound = 16 << (pattern - 9);
1323 break;
1324 case 0x1d: /* MUL4 */
1325 return elements - elements % 4;
1326 case 0x1e: /* MUL3 */
1327 return elements - elements % 3;
1328 case 0x1f: /* ALL */
1329 return elements;
1330 default: /* #uimm5 */
1331 return 0;
1332 }
1333 return elements >= bound ? bound : 0;
1334}
1335
1336/* This handles all of the predicate initialization instructions,
1337 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1338 * so that decode_pred_count returns 0. For SETFFR, we will have
1339 * set RD == 16 == FFR.
1340 */
1341static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1342{
1343 if (!sve_access_check(s)) {
1344 return true;
1345 }
1346
1347 unsigned fullsz = vec_full_reg_size(s);
1348 unsigned ofs = pred_full_reg_offset(s, rd);
1349 unsigned numelem, setsz, i;
1350 uint64_t word, lastword;
1351 TCGv_i64 t;
1352
1353 numelem = decode_pred_count(fullsz, pat, esz);
1354
1355 /* Determine what we must store into each bit, and how many. */
1356 if (numelem == 0) {
1357 lastword = word = 0;
1358 setsz = fullsz;
1359 } else {
1360 setsz = numelem << esz;
1361 lastword = word = pred_esz_masks[esz];
1362 if (setsz % 64) {
1363 lastword &= ~(-1ull << (setsz % 64));
1364 }
1365 }
1366
1367 t = tcg_temp_new_i64();
1368 if (fullsz <= 64) {
1369 tcg_gen_movi_i64(t, lastword);
1370 tcg_gen_st_i64(t, cpu_env, ofs);
1371 goto done;
1372 }
1373
1374 if (word == lastword) {
1375 unsigned maxsz = size_for_gvec(fullsz / 8);
1376 unsigned oprsz = size_for_gvec(setsz / 8);
1377
1378 if (oprsz * 8 == setsz) {
1379 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1380 goto done;
1381 }
1382 if (oprsz * 8 == setsz + 8) {
1383 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1384 tcg_gen_movi_i64(t, 0);
1385 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1386 goto done;
1387 }
1388 }
1389
1390 setsz /= 8;
1391 fullsz /= 8;
1392
1393 tcg_gen_movi_i64(t, word);
1394 for (i = 0; i < setsz; i += 8) {
1395 tcg_gen_st_i64(t, cpu_env, ofs + i);
1396 }
1397 if (lastword != word) {
1398 tcg_gen_movi_i64(t, lastword);
1399 tcg_gen_st_i64(t, cpu_env, ofs + i);
1400 i += 8;
1401 }
1402 if (i < fullsz) {
1403 tcg_gen_movi_i64(t, 0);
1404 for (; i < fullsz; i += 8) {
1405 tcg_gen_st_i64(t, cpu_env, ofs + i);
1406 }
1407 }
1408
1409 done:
1410 tcg_temp_free_i64(t);
1411
1412 /* PTRUES */
1413 if (setflag) {
1414 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1415 tcg_gen_movi_i32(cpu_CF, word == 0);
1416 tcg_gen_movi_i32(cpu_VF, 0);
1417 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1418 }
1419 return true;
1420}
1421
1422static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1423{
1424 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1425}
1426
1427static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1428{
1429 /* Note pat == 31 is #all, to set all elements. */
1430 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1431}
1432
1433static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1434{
1435 /* Note pat == 32 is #unimp, to set no elements. */
1436 return do_predset(s, 0, a->rd, 32, false);
1437}
1438
1439static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1440{
1441 /* The path through do_pppp_flags is complicated enough to want to avoid
1442 * duplication. Frob the arguments into the form of a predicated AND.
1443 */
1444 arg_rprr_s alt_a = {
1445 .rd = a->rd, .pg = a->pg, .s = a->s,
1446 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1447 };
1448 return trans_AND_pppp(s, &alt_a, insn);
1449}
1450
1451static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1452{
1453 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1454}
1455
1456static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1457{
1458 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1459}
1460
1461static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1462 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1463 TCGv_ptr, TCGv_i32))
1464{
1465 if (!sve_access_check(s)) {
1466 return true;
1467 }
1468
1469 TCGv_ptr t_pd = tcg_temp_new_ptr();
1470 TCGv_ptr t_pg = tcg_temp_new_ptr();
1471 TCGv_i32 t;
1472 unsigned desc;
1473
1474 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1475 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1476
1477 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1478 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1479 t = tcg_const_i32(desc);
1480
1481 gen_fn(t, t_pd, t_pg, t);
1482 tcg_temp_free_ptr(t_pd);
1483 tcg_temp_free_ptr(t_pg);
1484
1485 do_pred_flags(t);
1486 tcg_temp_free_i32(t);
1487 return true;
1488}
1489
1490static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1491{
1492 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1493}
1494
1495static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1496{
1497 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1498}
1499
d1822297
RH
1500/*
1501 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1502 */
1503
1504/* Subroutine loading a vector register at VOFS of LEN bytes.
1505 * The load should begin at the address Rn + IMM.
1506 */
1507
1508static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
1509 int rn, int imm)
1510{
1511 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
1512 uint32_t len_remain = len % 8;
1513 uint32_t nparts = len / 8 + ctpop8(len_remain);
1514 int midx = get_mem_index(s);
1515 TCGv_i64 addr, t0, t1;
1516
1517 addr = tcg_temp_new_i64();
1518 t0 = tcg_temp_new_i64();
1519
1520 /* Note that unpredicated load/store of vector/predicate registers
1521 * are defined as a stream of bytes, which equates to little-endian
1522 * operations on larger quantities. There is no nice way to force
1523 * a little-endian load for aarch64_be-linux-user out of line.
1524 *
1525 * Attempt to keep code expansion to a minimum by limiting the
1526 * amount of unrolling done.
1527 */
1528 if (nparts <= 4) {
1529 int i;
1530
1531 for (i = 0; i < len_align; i += 8) {
1532 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
1533 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1534 tcg_gen_st_i64(t0, cpu_env, vofs + i);
1535 }
1536 } else {
1537 TCGLabel *loop = gen_new_label();
1538 TCGv_ptr tp, i = tcg_const_local_ptr(0);
1539
1540 gen_set_label(loop);
1541
1542 /* Minimize the number of local temps that must be re-read from
1543 * the stack each iteration. Instead, re-compute values other
1544 * than the loop counter.
1545 */
1546 tp = tcg_temp_new_ptr();
1547 tcg_gen_addi_ptr(tp, i, imm);
1548 tcg_gen_extu_ptr_i64(addr, tp);
1549 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
1550
1551 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1552
1553 tcg_gen_add_ptr(tp, cpu_env, i);
1554 tcg_gen_addi_ptr(i, i, 8);
1555 tcg_gen_st_i64(t0, tp, vofs);
1556 tcg_temp_free_ptr(tp);
1557
1558 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
1559 tcg_temp_free_ptr(i);
1560 }
1561
1562 /* Predicate register loads can be any multiple of 2.
1563 * Note that we still store the entire 64-bit unit into cpu_env.
1564 */
1565 if (len_remain) {
1566 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
1567
1568 switch (len_remain) {
1569 case 2:
1570 case 4:
1571 case 8:
1572 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
1573 break;
1574
1575 case 6:
1576 t1 = tcg_temp_new_i64();
1577 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
1578 tcg_gen_addi_i64(addr, addr, 4);
1579 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
1580 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
1581 tcg_temp_free_i64(t1);
1582 break;
1583
1584 default:
1585 g_assert_not_reached();
1586 }
1587 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
1588 }
1589 tcg_temp_free_i64(addr);
1590 tcg_temp_free_i64(t0);
1591}
1592
1593static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
1594{
1595 if (sve_access_check(s)) {
1596 int size = vec_full_reg_size(s);
1597 int off = vec_full_reg_offset(s, a->rd);
1598 do_ldr(s, off, size, a->rn, a->imm * size);
1599 }
1600 return true;
1601}
1602
1603static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
1604{
1605 if (sve_access_check(s)) {
1606 int size = pred_full_reg_size(s);
1607 int off = pred_full_reg_offset(s, a->rd);
1608 do_ldr(s, off, size, a->rn, a->imm * size);
1609 }
1610 return true;
1611}