]> git.proxmox.com Git - mirror_qemu.git/blob - target/arm/tcg/translate-mve.c
2ad3c40975ccf2e86b3805859420037e0e103095
[mirror_qemu.git] / target / arm / tcg / translate-mve.c
1 /*
2 * ARM translation: M-profile MVE instructions
3 *
4 * Copyright (c) 2021 Linaro, Ltd.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "translate.h"
25 #include "translate-a32.h"
26
27 static inline int vidup_imm(DisasContext *s, int x)
28 {
29 return 1 << x;
30 }
31
32 /* Include the generated decoder */
33 #include "decode-mve.c.inc"
34
35 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
36 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32);
38 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
39 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
40 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
41 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
42 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
43 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
44 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
45 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
46 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
47 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
48 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
49 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
50 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
51 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
52
53 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
54 static inline long mve_qreg_offset(unsigned reg)
55 {
56 return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
57 }
58
59 static TCGv_ptr mve_qreg_ptr(unsigned reg)
60 {
61 TCGv_ptr ret = tcg_temp_new_ptr();
62 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
63 return ret;
64 }
65
66 static bool mve_no_predication(DisasContext *s)
67 {
68 /*
69 * Return true if we are executing the entire MVE instruction
70 * with no predication or partial-execution, and so we can safely
71 * use an inline TCG vector implementation.
72 */
73 return s->eci == 0 && s->mve_no_pred;
74 }
75
76 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
77 {
78 /*
79 * Check whether Qregs are in range. For v8.1M only Q0..Q7
80 * are supported, see VFPSmallRegisterBank().
81 */
82 return qmask < 8;
83 }
84
85 bool mve_eci_check(DisasContext *s)
86 {
87 /*
88 * This is a beatwise insn: check that ECI is valid (not a
89 * reserved value) and note that we are handling it.
90 * Return true if OK, false if we generated an exception.
91 */
92 s->eci_handled = true;
93 switch (s->eci) {
94 case ECI_NONE:
95 case ECI_A0:
96 case ECI_A0A1:
97 case ECI_A0A1A2:
98 case ECI_A0A1A2B0:
99 return true;
100 default:
101 /* Reserved value: INVSTATE UsageFault */
102 gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
103 return false;
104 }
105 }
106
107 void mve_update_eci(DisasContext *s)
108 {
109 /*
110 * The helper function will always update the CPUState field,
111 * so we only need to update the DisasContext field.
112 */
113 if (s->eci) {
114 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
115 }
116 }
117
118 void mve_update_and_store_eci(DisasContext *s)
119 {
120 /*
121 * For insns which don't call a helper function that will call
122 * mve_advance_vpt(), this version updates s->eci and also stores
123 * it out to the CPUState field.
124 */
125 if (s->eci) {
126 mve_update_eci(s);
127 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
128 }
129 }
130
131 static bool mve_skip_first_beat(DisasContext *s)
132 {
133 /* Return true if PSR.ECI says we must skip the first beat of this insn */
134 switch (s->eci) {
135 case ECI_NONE:
136 return false;
137 case ECI_A0:
138 case ECI_A0A1:
139 case ECI_A0A1A2:
140 case ECI_A0A1A2B0:
141 return true;
142 default:
143 g_assert_not_reached();
144 }
145 }
146
147 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
148 unsigned msize)
149 {
150 TCGv_i32 addr;
151 uint32_t offset;
152 TCGv_ptr qreg;
153
154 if (!dc_isar_feature(aa32_mve, s) ||
155 !mve_check_qreg_bank(s, a->qd) ||
156 !fn) {
157 return false;
158 }
159
160 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
161 if (a->rn == 15 || (a->rn == 13 && a->w)) {
162 return false;
163 }
164
165 if (!mve_eci_check(s) || !vfp_access_check(s)) {
166 return true;
167 }
168
169 offset = a->imm << msize;
170 if (!a->a) {
171 offset = -offset;
172 }
173 addr = load_reg(s, a->rn);
174 if (a->p) {
175 tcg_gen_addi_i32(addr, addr, offset);
176 }
177
178 qreg = mve_qreg_ptr(a->qd);
179 fn(cpu_env, qreg, addr);
180
181 /*
182 * Writeback always happens after the last beat of the insn,
183 * regardless of predication
184 */
185 if (a->w) {
186 if (!a->p) {
187 tcg_gen_addi_i32(addr, addr, offset);
188 }
189 store_reg(s, a->rn, addr);
190 }
191 mve_update_eci(s);
192 return true;
193 }
194
195 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
196 {
197 static MVEGenLdStFn * const ldstfns[4][2] = {
198 { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
199 { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
200 { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
201 { NULL, NULL }
202 };
203 return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
204 }
205
206 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \
207 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
208 { \
209 static MVEGenLdStFn * const ldstfns[2][2] = { \
210 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
211 { NULL, gen_helper_mve_##ULD }, \
212 }; \
213 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \
214 }
215
216 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
217 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
218 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
219
220 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
221 {
222 TCGv_i32 addr;
223 TCGv_ptr qd, qm;
224
225 if (!dc_isar_feature(aa32_mve, s) ||
226 !mve_check_qreg_bank(s, a->qd | a->qm) ||
227 !fn || a->rn == 15) {
228 /* Rn case is UNPREDICTABLE */
229 return false;
230 }
231
232 if (!mve_eci_check(s) || !vfp_access_check(s)) {
233 return true;
234 }
235
236 addr = load_reg(s, a->rn);
237
238 qd = mve_qreg_ptr(a->qd);
239 qm = mve_qreg_ptr(a->qm);
240 fn(cpu_env, qd, qm, addr);
241 mve_update_eci(s);
242 return true;
243 }
244
245 /*
246 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
247 * signextended to halfword elements in register". _os_ indicates that
248 * the offsets in Qm should be scaled by the element size.
249 */
250 /* This macro is just to make the arrays more compact in these functions */
251 #define F(N) gen_helper_mve_##N
252
253 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
254 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
255 {
256 static MVEGenLdStSGFn * const fns[2][4][4] = { {
257 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
258 { NULL, NULL, F(vldrh_sg_sw), NULL },
259 { NULL, NULL, NULL, NULL },
260 { NULL, NULL, NULL, NULL }
261 }, {
262 { NULL, NULL, NULL, NULL },
263 { NULL, NULL, F(vldrh_sg_os_sw), NULL },
264 { NULL, NULL, NULL, NULL },
265 { NULL, NULL, NULL, NULL }
266 }
267 };
268 if (a->qd == a->qm) {
269 return false; /* UNPREDICTABLE */
270 }
271 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
272 }
273
274 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
275 {
276 static MVEGenLdStSGFn * const fns[2][4][4] = { {
277 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
278 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
279 { NULL, NULL, F(vldrw_sg_uw), NULL },
280 { NULL, NULL, NULL, F(vldrd_sg_ud) }
281 }, {
282 { NULL, NULL, NULL, NULL },
283 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
284 { NULL, NULL, F(vldrw_sg_os_uw), NULL },
285 { NULL, NULL, NULL, F(vldrd_sg_os_ud) }
286 }
287 };
288 if (a->qd == a->qm) {
289 return false; /* UNPREDICTABLE */
290 }
291 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
292 }
293
294 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
295 {
296 static MVEGenLdStSGFn * const fns[2][4][4] = { {
297 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
298 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
299 { NULL, NULL, F(vstrw_sg_uw), NULL },
300 { NULL, NULL, NULL, F(vstrd_sg_ud) }
301 }, {
302 { NULL, NULL, NULL, NULL },
303 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
304 { NULL, NULL, F(vstrw_sg_os_uw), NULL },
305 { NULL, NULL, NULL, F(vstrd_sg_os_ud) }
306 }
307 };
308 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
309 }
310
311 #undef F
312
313 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
314 MVEGenLdStSGFn *fn, unsigned msize)
315 {
316 uint32_t offset;
317 TCGv_ptr qd, qm;
318
319 if (!dc_isar_feature(aa32_mve, s) ||
320 !mve_check_qreg_bank(s, a->qd | a->qm) ||
321 !fn) {
322 return false;
323 }
324
325 if (!mve_eci_check(s) || !vfp_access_check(s)) {
326 return true;
327 }
328
329 offset = a->imm << msize;
330 if (!a->a) {
331 offset = -offset;
332 }
333
334 qd = mve_qreg_ptr(a->qd);
335 qm = mve_qreg_ptr(a->qm);
336 fn(cpu_env, qd, qm, tcg_constant_i32(offset));
337 mve_update_eci(s);
338 return true;
339 }
340
341 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
342 {
343 static MVEGenLdStSGFn * const fns[] = {
344 gen_helper_mve_vldrw_sg_uw,
345 gen_helper_mve_vldrw_sg_wb_uw,
346 };
347 if (a->qd == a->qm) {
348 return false; /* UNPREDICTABLE */
349 }
350 return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
351 }
352
353 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
354 {
355 static MVEGenLdStSGFn * const fns[] = {
356 gen_helper_mve_vldrd_sg_ud,
357 gen_helper_mve_vldrd_sg_wb_ud,
358 };
359 if (a->qd == a->qm) {
360 return false; /* UNPREDICTABLE */
361 }
362 return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
363 }
364
365 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
366 {
367 static MVEGenLdStSGFn * const fns[] = {
368 gen_helper_mve_vstrw_sg_uw,
369 gen_helper_mve_vstrw_sg_wb_uw,
370 };
371 return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
372 }
373
374 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
375 {
376 static MVEGenLdStSGFn * const fns[] = {
377 gen_helper_mve_vstrd_sg_ud,
378 gen_helper_mve_vstrd_sg_wb_ud,
379 };
380 return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
381 }
382
383 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn,
384 int addrinc)
385 {
386 TCGv_i32 rn;
387
388 if (!dc_isar_feature(aa32_mve, s) ||
389 !mve_check_qreg_bank(s, a->qd) ||
390 !fn || (a->rn == 13 && a->w) || a->rn == 15) {
391 /* Variously UNPREDICTABLE or UNDEF or related-encoding */
392 return false;
393 }
394 if (!mve_eci_check(s) || !vfp_access_check(s)) {
395 return true;
396 }
397
398 rn = load_reg(s, a->rn);
399 /*
400 * We pass the index of Qd, not a pointer, because the helper must
401 * access multiple Q registers starting at Qd and working up.
402 */
403 fn(cpu_env, tcg_constant_i32(a->qd), rn);
404
405 if (a->w) {
406 tcg_gen_addi_i32(rn, rn, addrinc);
407 store_reg(s, a->rn, rn);
408 }
409 mve_update_and_store_eci(s);
410 return true;
411 }
412
413 /* This macro is just to make the arrays more compact in these functions */
414 #define F(N) gen_helper_mve_##N
415
416 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a)
417 {
418 static MVEGenLdStIlFn * const fns[4][4] = {
419 { F(vld20b), F(vld20h), F(vld20w), NULL, },
420 { F(vld21b), F(vld21h), F(vld21w), NULL, },
421 { NULL, NULL, NULL, NULL },
422 { NULL, NULL, NULL, NULL },
423 };
424 if (a->qd > 6) {
425 return false;
426 }
427 return do_vldst_il(s, a, fns[a->pat][a->size], 32);
428 }
429
430 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a)
431 {
432 static MVEGenLdStIlFn * const fns[4][4] = {
433 { F(vld40b), F(vld40h), F(vld40w), NULL, },
434 { F(vld41b), F(vld41h), F(vld41w), NULL, },
435 { F(vld42b), F(vld42h), F(vld42w), NULL, },
436 { F(vld43b), F(vld43h), F(vld43w), NULL, },
437 };
438 if (a->qd > 4) {
439 return false;
440 }
441 return do_vldst_il(s, a, fns[a->pat][a->size], 64);
442 }
443
444 static bool trans_VST2(DisasContext *s, arg_vldst_il *a)
445 {
446 static MVEGenLdStIlFn * const fns[4][4] = {
447 { F(vst20b), F(vst20h), F(vst20w), NULL, },
448 { F(vst21b), F(vst21h), F(vst21w), NULL, },
449 { NULL, NULL, NULL, NULL },
450 { NULL, NULL, NULL, NULL },
451 };
452 if (a->qd > 6) {
453 return false;
454 }
455 return do_vldst_il(s, a, fns[a->pat][a->size], 32);
456 }
457
458 static bool trans_VST4(DisasContext *s, arg_vldst_il *a)
459 {
460 static MVEGenLdStIlFn * const fns[4][4] = {
461 { F(vst40b), F(vst40h), F(vst40w), NULL, },
462 { F(vst41b), F(vst41h), F(vst41w), NULL, },
463 { F(vst42b), F(vst42h), F(vst42w), NULL, },
464 { F(vst43b), F(vst43h), F(vst43w), NULL, },
465 };
466 if (a->qd > 4) {
467 return false;
468 }
469 return do_vldst_il(s, a, fns[a->pat][a->size], 64);
470 }
471
472 #undef F
473
474 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
475 {
476 TCGv_ptr qd;
477 TCGv_i32 rt;
478
479 if (!dc_isar_feature(aa32_mve, s) ||
480 !mve_check_qreg_bank(s, a->qd)) {
481 return false;
482 }
483 if (a->rt == 13 || a->rt == 15) {
484 /* UNPREDICTABLE; we choose to UNDEF */
485 return false;
486 }
487 if (!mve_eci_check(s) || !vfp_access_check(s)) {
488 return true;
489 }
490
491 rt = load_reg(s, a->rt);
492 if (mve_no_predication(s)) {
493 tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt);
494 } else {
495 qd = mve_qreg_ptr(a->qd);
496 tcg_gen_dup_i32(a->size, rt, rt);
497 gen_helper_mve_vdup(cpu_env, qd, rt);
498 }
499 mve_update_eci(s);
500 return true;
501 }
502
503 static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn,
504 GVecGen2Fn vecfn)
505 {
506 TCGv_ptr qd, qm;
507
508 if (!dc_isar_feature(aa32_mve, s) ||
509 !mve_check_qreg_bank(s, a->qd | a->qm) ||
510 !fn) {
511 return false;
512 }
513
514 if (!mve_eci_check(s) || !vfp_access_check(s)) {
515 return true;
516 }
517
518 if (vecfn && mve_no_predication(s)) {
519 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16);
520 } else {
521 qd = mve_qreg_ptr(a->qd);
522 qm = mve_qreg_ptr(a->qm);
523 fn(cpu_env, qd, qm);
524 }
525 mve_update_eci(s);
526 return true;
527 }
528
529 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
530 {
531 return do_1op_vec(s, a, fn, NULL);
532 }
533
534 #define DO_1OP_VEC(INSN, FN, VECFN) \
535 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
536 { \
537 static MVEGenOneOpFn * const fns[] = { \
538 gen_helper_mve_##FN##b, \
539 gen_helper_mve_##FN##h, \
540 gen_helper_mve_##FN##w, \
541 NULL, \
542 }; \
543 return do_1op_vec(s, a, fns[a->size], VECFN); \
544 }
545
546 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
547
548 DO_1OP(VCLZ, vclz)
549 DO_1OP(VCLS, vcls)
550 DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs)
551 DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg)
552 DO_1OP(VQABS, vqabs)
553 DO_1OP(VQNEG, vqneg)
554 DO_1OP(VMAXA, vmaxa)
555 DO_1OP(VMINA, vmina)
556
557 /*
558 * For simple float/int conversions we use the fixed-point
559 * conversion helpers with a zero shift count
560 */
561 #define DO_VCVT(INSN, HFN, SFN) \
562 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
563 { \
564 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \
565 } \
566 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
567 { \
568 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \
569 } \
570 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
571 { \
572 static MVEGenOneOpFn * const fns[] = { \
573 NULL, \
574 gen_##INSN##h, \
575 gen_##INSN##s, \
576 NULL, \
577 }; \
578 if (!dc_isar_feature(aa32_mve_fp, s)) { \
579 return false; \
580 } \
581 return do_1op(s, a, fns[a->size]); \
582 }
583
584 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf)
585 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf)
586 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs)
587 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu)
588
589 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a,
590 ARMFPRounding rmode, bool u)
591 {
592 /*
593 * Handle VCVT fp to int with specified rounding mode.
594 * This is a 1op fn but we must pass the rounding mode as
595 * an immediate to the helper.
596 */
597 TCGv_ptr qd, qm;
598 static MVEGenVCVTRmodeFn * const fns[4][2] = {
599 { NULL, NULL },
600 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh },
601 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us },
602 { NULL, NULL },
603 };
604 MVEGenVCVTRmodeFn *fn = fns[a->size][u];
605
606 if (!dc_isar_feature(aa32_mve_fp, s) ||
607 !mve_check_qreg_bank(s, a->qd | a->qm) ||
608 !fn) {
609 return false;
610 }
611
612 if (!mve_eci_check(s) || !vfp_access_check(s)) {
613 return true;
614 }
615
616 qd = mve_qreg_ptr(a->qd);
617 qm = mve_qreg_ptr(a->qm);
618 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode)));
619 mve_update_eci(s);
620 return true;
621 }
622
623 #define DO_VCVT_RMODE(INSN, RMODE, U) \
624 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
625 { \
626 return do_vcvt_rmode(s, a, RMODE, U); \
627 } \
628
629 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
630 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true)
631 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false)
632 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true)
633 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false)
634 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
635 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
636 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
637
638 #define DO_VCVT_SH(INSN, FN) \
639 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
640 { \
641 if (!dc_isar_feature(aa32_mve_fp, s)) { \
642 return false; \
643 } \
644 return do_1op(s, a, gen_helper_mve_##FN); \
645 } \
646
647 DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
648 DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
649 DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
650 DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
651
652 #define DO_VRINT(INSN, RMODE) \
653 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
654 { \
655 gen_helper_mve_vrint_rm_h(env, qd, qm, \
656 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
657 } \
658 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
659 { \
660 gen_helper_mve_vrint_rm_s(env, qd, qm, \
661 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
662 } \
663 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
664 { \
665 static MVEGenOneOpFn * const fns[] = { \
666 NULL, \
667 gen_##INSN##h, \
668 gen_##INSN##s, \
669 NULL, \
670 }; \
671 if (!dc_isar_feature(aa32_mve_fp, s)) { \
672 return false; \
673 } \
674 return do_1op(s, a, fns[a->size]); \
675 }
676
677 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
678 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
679 DO_VRINT(VRINTZ, FPROUNDING_ZERO)
680 DO_VRINT(VRINTM, FPROUNDING_NEGINF)
681 DO_VRINT(VRINTP, FPROUNDING_POSINF)
682
683 static bool trans_VRINTX(DisasContext *s, arg_1op *a)
684 {
685 static MVEGenOneOpFn * const fns[] = {
686 NULL,
687 gen_helper_mve_vrintx_h,
688 gen_helper_mve_vrintx_s,
689 NULL,
690 };
691 if (!dc_isar_feature(aa32_mve_fp, s)) {
692 return false;
693 }
694 return do_1op(s, a, fns[a->size]);
695 }
696
697 /* Narrowing moves: only size 0 and 1 are valid */
698 #define DO_VMOVN(INSN, FN) \
699 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
700 { \
701 static MVEGenOneOpFn * const fns[] = { \
702 gen_helper_mve_##FN##b, \
703 gen_helper_mve_##FN##h, \
704 NULL, \
705 NULL, \
706 }; \
707 return do_1op(s, a, fns[a->size]); \
708 }
709
710 DO_VMOVN(VMOVNB, vmovnb)
711 DO_VMOVN(VMOVNT, vmovnt)
712 DO_VMOVN(VQMOVUNB, vqmovunb)
713 DO_VMOVN(VQMOVUNT, vqmovunt)
714 DO_VMOVN(VQMOVN_BS, vqmovnbs)
715 DO_VMOVN(VQMOVN_TS, vqmovnts)
716 DO_VMOVN(VQMOVN_BU, vqmovnbu)
717 DO_VMOVN(VQMOVN_TU, vqmovntu)
718
719 static bool trans_VREV16(DisasContext *s, arg_1op *a)
720 {
721 static MVEGenOneOpFn * const fns[] = {
722 gen_helper_mve_vrev16b,
723 NULL,
724 NULL,
725 NULL,
726 };
727 return do_1op(s, a, fns[a->size]);
728 }
729
730 static bool trans_VREV32(DisasContext *s, arg_1op *a)
731 {
732 static MVEGenOneOpFn * const fns[] = {
733 gen_helper_mve_vrev32b,
734 gen_helper_mve_vrev32h,
735 NULL,
736 NULL,
737 };
738 return do_1op(s, a, fns[a->size]);
739 }
740
741 static bool trans_VREV64(DisasContext *s, arg_1op *a)
742 {
743 static MVEGenOneOpFn * const fns[] = {
744 gen_helper_mve_vrev64b,
745 gen_helper_mve_vrev64h,
746 gen_helper_mve_vrev64w,
747 NULL,
748 };
749 return do_1op(s, a, fns[a->size]);
750 }
751
752 static bool trans_VMVN(DisasContext *s, arg_1op *a)
753 {
754 return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not);
755 }
756
757 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
758 {
759 static MVEGenOneOpFn * const fns[] = {
760 NULL,
761 gen_helper_mve_vfabsh,
762 gen_helper_mve_vfabss,
763 NULL,
764 };
765 if (!dc_isar_feature(aa32_mve_fp, s)) {
766 return false;
767 }
768 return do_1op(s, a, fns[a->size]);
769 }
770
771 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
772 {
773 static MVEGenOneOpFn * const fns[] = {
774 NULL,
775 gen_helper_mve_vfnegh,
776 gen_helper_mve_vfnegs,
777 NULL,
778 };
779 if (!dc_isar_feature(aa32_mve_fp, s)) {
780 return false;
781 }
782 return do_1op(s, a, fns[a->size]);
783 }
784
785 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn,
786 GVecGen3Fn *vecfn)
787 {
788 TCGv_ptr qd, qn, qm;
789
790 if (!dc_isar_feature(aa32_mve, s) ||
791 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
792 !fn) {
793 return false;
794 }
795 if (!mve_eci_check(s) || !vfp_access_check(s)) {
796 return true;
797 }
798
799 if (vecfn && mve_no_predication(s)) {
800 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn),
801 mve_qreg_offset(a->qm), 16, 16);
802 } else {
803 qd = mve_qreg_ptr(a->qd);
804 qn = mve_qreg_ptr(a->qn);
805 qm = mve_qreg_ptr(a->qm);
806 fn(cpu_env, qd, qn, qm);
807 }
808 mve_update_eci(s);
809 return true;
810 }
811
812 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn)
813 {
814 return do_2op_vec(s, a, fn, NULL);
815 }
816
817 #define DO_LOGIC(INSN, HELPER, VECFN) \
818 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
819 { \
820 return do_2op_vec(s, a, HELPER, VECFN); \
821 }
822
823 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and)
824 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc)
825 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or)
826 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc)
827 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor)
828
829 static bool trans_VPSEL(DisasContext *s, arg_2op *a)
830 {
831 /* This insn updates predication bits */
832 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
833 return do_2op(s, a, gen_helper_mve_vpsel);
834 }
835
836 #define DO_2OP_VEC(INSN, FN, VECFN) \
837 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
838 { \
839 static MVEGenTwoOpFn * const fns[] = { \
840 gen_helper_mve_##FN##b, \
841 gen_helper_mve_##FN##h, \
842 gen_helper_mve_##FN##w, \
843 NULL, \
844 }; \
845 return do_2op_vec(s, a, fns[a->size], VECFN); \
846 }
847
848 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
849
850 DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add)
851 DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub)
852 DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul)
853 DO_2OP(VMULH_S, vmulhs)
854 DO_2OP(VMULH_U, vmulhu)
855 DO_2OP(VRMULH_S, vrmulhs)
856 DO_2OP(VRMULH_U, vrmulhu)
857 DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax)
858 DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax)
859 DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin)
860 DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin)
861 DO_2OP(VABD_S, vabds)
862 DO_2OP(VABD_U, vabdu)
863 DO_2OP(VHADD_S, vhadds)
864 DO_2OP(VHADD_U, vhaddu)
865 DO_2OP(VHSUB_S, vhsubs)
866 DO_2OP(VHSUB_U, vhsubu)
867 DO_2OP(VMULL_BS, vmullbs)
868 DO_2OP(VMULL_BU, vmullbu)
869 DO_2OP(VMULL_TS, vmullts)
870 DO_2OP(VMULL_TU, vmulltu)
871 DO_2OP(VQDMULH, vqdmulh)
872 DO_2OP(VQRDMULH, vqrdmulh)
873 DO_2OP(VQADD_S, vqadds)
874 DO_2OP(VQADD_U, vqaddu)
875 DO_2OP(VQSUB_S, vqsubs)
876 DO_2OP(VQSUB_U, vqsubu)
877 DO_2OP(VSHL_S, vshls)
878 DO_2OP(VSHL_U, vshlu)
879 DO_2OP(VRSHL_S, vrshls)
880 DO_2OP(VRSHL_U, vrshlu)
881 DO_2OP(VQSHL_S, vqshls)
882 DO_2OP(VQSHL_U, vqshlu)
883 DO_2OP(VQRSHL_S, vqrshls)
884 DO_2OP(VQRSHL_U, vqrshlu)
885 DO_2OP(VQDMLADH, vqdmladh)
886 DO_2OP(VQDMLADHX, vqdmladhx)
887 DO_2OP(VQRDMLADH, vqrdmladh)
888 DO_2OP(VQRDMLADHX, vqrdmladhx)
889 DO_2OP(VQDMLSDH, vqdmlsdh)
890 DO_2OP(VQDMLSDHX, vqdmlsdhx)
891 DO_2OP(VQRDMLSDH, vqrdmlsdh)
892 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
893 DO_2OP(VRHADD_S, vrhadds)
894 DO_2OP(VRHADD_U, vrhaddu)
895 /*
896 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
897 * so we can reuse the DO_2OP macro. (Our implementation calculates the
898 * "expected" results in this case.) Similarly for VHCADD.
899 */
900 DO_2OP(VCADD90, vcadd90)
901 DO_2OP(VCADD270, vcadd270)
902 DO_2OP(VHCADD90, vhcadd90)
903 DO_2OP(VHCADD270, vhcadd270)
904
905 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
906 {
907 static MVEGenTwoOpFn * const fns[] = {
908 NULL,
909 gen_helper_mve_vqdmullbh,
910 gen_helper_mve_vqdmullbw,
911 NULL,
912 };
913 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
914 /* UNPREDICTABLE; we choose to undef */
915 return false;
916 }
917 return do_2op(s, a, fns[a->size]);
918 }
919
920 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
921 {
922 static MVEGenTwoOpFn * const fns[] = {
923 NULL,
924 gen_helper_mve_vqdmullth,
925 gen_helper_mve_vqdmulltw,
926 NULL,
927 };
928 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
929 /* UNPREDICTABLE; we choose to undef */
930 return false;
931 }
932 return do_2op(s, a, fns[a->size]);
933 }
934
935 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
936 {
937 /*
938 * Note that a->size indicates the output size, ie VMULL.P8
939 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
940 * is the 16x16->32 operation and a->size is MO_32.
941 */
942 static MVEGenTwoOpFn * const fns[] = {
943 NULL,
944 gen_helper_mve_vmullpbh,
945 gen_helper_mve_vmullpbw,
946 NULL,
947 };
948 return do_2op(s, a, fns[a->size]);
949 }
950
951 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
952 {
953 /* a->size is as for trans_VMULLP_B */
954 static MVEGenTwoOpFn * const fns[] = {
955 NULL,
956 gen_helper_mve_vmullpth,
957 gen_helper_mve_vmullptw,
958 NULL,
959 };
960 return do_2op(s, a, fns[a->size]);
961 }
962
963 /*
964 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
965 * of the 32-bit elements in each lane of the input vectors, where the
966 * carry-out of each add is the carry-in of the next. The initial carry
967 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
968 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
969 * These insns are subject to beat-wise execution. Partial execution
970 * of an I=1 (initial carry input fixed) insn which does not
971 * execute the first beat must start with the current FPSCR.NZCV
972 * value, not the fixed constant input.
973 */
974 static bool trans_VADC(DisasContext *s, arg_2op *a)
975 {
976 return do_2op(s, a, gen_helper_mve_vadc);
977 }
978
979 static bool trans_VADCI(DisasContext *s, arg_2op *a)
980 {
981 if (mve_skip_first_beat(s)) {
982 return trans_VADC(s, a);
983 }
984 return do_2op(s, a, gen_helper_mve_vadci);
985 }
986
987 static bool trans_VSBC(DisasContext *s, arg_2op *a)
988 {
989 return do_2op(s, a, gen_helper_mve_vsbc);
990 }
991
992 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
993 {
994 if (mve_skip_first_beat(s)) {
995 return trans_VSBC(s, a);
996 }
997 return do_2op(s, a, gen_helper_mve_vsbci);
998 }
999
1000 #define DO_2OP_FP(INSN, FN) \
1001 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
1002 { \
1003 static MVEGenTwoOpFn * const fns[] = { \
1004 NULL, \
1005 gen_helper_mve_##FN##h, \
1006 gen_helper_mve_##FN##s, \
1007 NULL, \
1008 }; \
1009 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1010 return false; \
1011 } \
1012 return do_2op(s, a, fns[a->size]); \
1013 }
1014
1015 DO_2OP_FP(VADD_fp, vfadd)
1016 DO_2OP_FP(VSUB_fp, vfsub)
1017 DO_2OP_FP(VMUL_fp, vfmul)
1018 DO_2OP_FP(VABD_fp, vfabd)
1019 DO_2OP_FP(VMAXNM, vmaxnm)
1020 DO_2OP_FP(VMINNM, vminnm)
1021 DO_2OP_FP(VCADD90_fp, vfcadd90)
1022 DO_2OP_FP(VCADD270_fp, vfcadd270)
1023 DO_2OP_FP(VFMA, vfma)
1024 DO_2OP_FP(VFMS, vfms)
1025 DO_2OP_FP(VCMUL0, vcmul0)
1026 DO_2OP_FP(VCMUL90, vcmul90)
1027 DO_2OP_FP(VCMUL180, vcmul180)
1028 DO_2OP_FP(VCMUL270, vcmul270)
1029 DO_2OP_FP(VCMLA0, vcmla0)
1030 DO_2OP_FP(VCMLA90, vcmla90)
1031 DO_2OP_FP(VCMLA180, vcmla180)
1032 DO_2OP_FP(VCMLA270, vcmla270)
1033 DO_2OP_FP(VMAXNMA, vmaxnma)
1034 DO_2OP_FP(VMINNMA, vminnma)
1035
1036 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
1037 MVEGenTwoOpScalarFn fn)
1038 {
1039 TCGv_ptr qd, qn;
1040 TCGv_i32 rm;
1041
1042 if (!dc_isar_feature(aa32_mve, s) ||
1043 !mve_check_qreg_bank(s, a->qd | a->qn) ||
1044 !fn) {
1045 return false;
1046 }
1047 if (a->rm == 13 || a->rm == 15) {
1048 /* UNPREDICTABLE */
1049 return false;
1050 }
1051 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1052 return true;
1053 }
1054
1055 qd = mve_qreg_ptr(a->qd);
1056 qn = mve_qreg_ptr(a->qn);
1057 rm = load_reg(s, a->rm);
1058 fn(cpu_env, qd, qn, rm);
1059 mve_update_eci(s);
1060 return true;
1061 }
1062
1063 #define DO_2OP_SCALAR(INSN, FN) \
1064 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1065 { \
1066 static MVEGenTwoOpScalarFn * const fns[] = { \
1067 gen_helper_mve_##FN##b, \
1068 gen_helper_mve_##FN##h, \
1069 gen_helper_mve_##FN##w, \
1070 NULL, \
1071 }; \
1072 return do_2op_scalar(s, a, fns[a->size]); \
1073 }
1074
1075 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
1076 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
1077 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
1078 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
1079 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
1080 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
1081 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
1082 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
1083 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
1084 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
1085 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
1086 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
1087 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
1088 DO_2OP_SCALAR(VBRSR, vbrsr)
1089 DO_2OP_SCALAR(VMLA, vmla)
1090 DO_2OP_SCALAR(VMLAS, vmlas)
1091 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
1092 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
1093 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
1094 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
1095
1096 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
1097 {
1098 static MVEGenTwoOpScalarFn * const fns[] = {
1099 NULL,
1100 gen_helper_mve_vqdmullb_scalarh,
1101 gen_helper_mve_vqdmullb_scalarw,
1102 NULL,
1103 };
1104 if (a->qd == a->qn && a->size == MO_32) {
1105 /* UNPREDICTABLE; we choose to undef */
1106 return false;
1107 }
1108 return do_2op_scalar(s, a, fns[a->size]);
1109 }
1110
1111 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
1112 {
1113 static MVEGenTwoOpScalarFn * const fns[] = {
1114 NULL,
1115 gen_helper_mve_vqdmullt_scalarh,
1116 gen_helper_mve_vqdmullt_scalarw,
1117 NULL,
1118 };
1119 if (a->qd == a->qn && a->size == MO_32) {
1120 /* UNPREDICTABLE; we choose to undef */
1121 return false;
1122 }
1123 return do_2op_scalar(s, a, fns[a->size]);
1124 }
1125
1126
1127 #define DO_2OP_FP_SCALAR(INSN, FN) \
1128 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1129 { \
1130 static MVEGenTwoOpScalarFn * const fns[] = { \
1131 NULL, \
1132 gen_helper_mve_##FN##h, \
1133 gen_helper_mve_##FN##s, \
1134 NULL, \
1135 }; \
1136 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1137 return false; \
1138 } \
1139 return do_2op_scalar(s, a, fns[a->size]); \
1140 }
1141
1142 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar)
1143 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar)
1144 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar)
1145 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar)
1146 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar)
1147
1148 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
1149 MVEGenLongDualAccOpFn *fn)
1150 {
1151 TCGv_ptr qn, qm;
1152 TCGv_i64 rda_i, rda_o;
1153 TCGv_i32 rdalo, rdahi;
1154
1155 if (!dc_isar_feature(aa32_mve, s) ||
1156 !mve_check_qreg_bank(s, a->qn | a->qm) ||
1157 !fn) {
1158 return false;
1159 }
1160 /*
1161 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1162 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1163 */
1164 if (a->rdahi == 13 || a->rdahi == 15) {
1165 return false;
1166 }
1167 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1168 return true;
1169 }
1170
1171 qn = mve_qreg_ptr(a->qn);
1172 qm = mve_qreg_ptr(a->qm);
1173
1174 /*
1175 * This insn is subject to beat-wise execution. Partial execution
1176 * of an A=0 (no-accumulate) insn which does not execute the first
1177 * beat must start with the current rda value, not 0.
1178 */
1179 rda_o = tcg_temp_new_i64();
1180 if (a->a || mve_skip_first_beat(s)) {
1181 rda_i = rda_o;
1182 rdalo = load_reg(s, a->rdalo);
1183 rdahi = load_reg(s, a->rdahi);
1184 tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi);
1185 } else {
1186 rda_i = tcg_constant_i64(0);
1187 }
1188
1189 fn(rda_o, cpu_env, qn, qm, rda_i);
1190
1191 rdalo = tcg_temp_new_i32();
1192 rdahi = tcg_temp_new_i32();
1193 tcg_gen_extrl_i64_i32(rdalo, rda_o);
1194 tcg_gen_extrh_i64_i32(rdahi, rda_o);
1195 store_reg(s, a->rdalo, rdalo);
1196 store_reg(s, a->rdahi, rdahi);
1197 mve_update_eci(s);
1198 return true;
1199 }
1200
1201 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
1202 {
1203 static MVEGenLongDualAccOpFn * const fns[4][2] = {
1204 { NULL, NULL },
1205 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
1206 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
1207 { NULL, NULL },
1208 };
1209 return do_long_dual_acc(s, a, fns[a->size][a->x]);
1210 }
1211
1212 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
1213 {
1214 static MVEGenLongDualAccOpFn * const fns[4][2] = {
1215 { NULL, NULL },
1216 { gen_helper_mve_vmlaldavuh, NULL },
1217 { gen_helper_mve_vmlaldavuw, NULL },
1218 { NULL, NULL },
1219 };
1220 return do_long_dual_acc(s, a, fns[a->size][a->x]);
1221 }
1222
1223 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
1224 {
1225 static MVEGenLongDualAccOpFn * const fns[4][2] = {
1226 { NULL, NULL },
1227 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
1228 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
1229 { NULL, NULL },
1230 };
1231 return do_long_dual_acc(s, a, fns[a->size][a->x]);
1232 }
1233
1234 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
1235 {
1236 static MVEGenLongDualAccOpFn * const fns[] = {
1237 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
1238 };
1239 return do_long_dual_acc(s, a, fns[a->x]);
1240 }
1241
1242 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
1243 {
1244 static MVEGenLongDualAccOpFn * const fns[] = {
1245 gen_helper_mve_vrmlaldavhuw, NULL,
1246 };
1247 return do_long_dual_acc(s, a, fns[a->x]);
1248 }
1249
1250 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
1251 {
1252 static MVEGenLongDualAccOpFn * const fns[] = {
1253 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
1254 };
1255 return do_long_dual_acc(s, a, fns[a->x]);
1256 }
1257
1258 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
1259 {
1260 TCGv_ptr qn, qm;
1261 TCGv_i32 rda_i, rda_o;
1262
1263 if (!dc_isar_feature(aa32_mve, s) ||
1264 !mve_check_qreg_bank(s, a->qn) ||
1265 !fn) {
1266 return false;
1267 }
1268 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1269 return true;
1270 }
1271
1272 qn = mve_qreg_ptr(a->qn);
1273 qm = mve_qreg_ptr(a->qm);
1274
1275 /*
1276 * This insn is subject to beat-wise execution. Partial execution
1277 * of an A=0 (no-accumulate) insn which does not execute the first
1278 * beat must start with the current rda value, not 0.
1279 */
1280 if (a->a || mve_skip_first_beat(s)) {
1281 rda_o = rda_i = load_reg(s, a->rda);
1282 } else {
1283 rda_i = tcg_constant_i32(0);
1284 rda_o = tcg_temp_new_i32();
1285 }
1286
1287 fn(rda_o, cpu_env, qn, qm, rda_i);
1288 store_reg(s, a->rda, rda_o);
1289
1290 mve_update_eci(s);
1291 return true;
1292 }
1293
1294 #define DO_DUAL_ACC(INSN, FN) \
1295 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \
1296 { \
1297 static MVEGenDualAccOpFn * const fns[4][2] = { \
1298 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \
1299 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \
1300 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \
1301 { NULL, NULL }, \
1302 }; \
1303 return do_dual_acc(s, a, fns[a->size][a->x]); \
1304 }
1305
1306 DO_DUAL_ACC(VMLADAV_S, vmladavs)
1307 DO_DUAL_ACC(VMLSDAV, vmlsdav)
1308
1309 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
1310 {
1311 static MVEGenDualAccOpFn * const fns[4][2] = {
1312 { gen_helper_mve_vmladavub, NULL },
1313 { gen_helper_mve_vmladavuh, NULL },
1314 { gen_helper_mve_vmladavuw, NULL },
1315 { NULL, NULL },
1316 };
1317 return do_dual_acc(s, a, fns[a->size][a->x]);
1318 }
1319
1320 static void gen_vpst(DisasContext *s, uint32_t mask)
1321 {
1322 /*
1323 * Set the VPR mask fields. We take advantage of MASK01 and MASK23
1324 * being adjacent fields in the register.
1325 *
1326 * Updating the masks is not predicated, but it is subject to beat-wise
1327 * execution, and the mask is updated on the odd-numbered beats.
1328 * So if PSR.ECI says we should skip beat 1, we mustn't update the
1329 * 01 mask field.
1330 */
1331 TCGv_i32 vpr = load_cpu_field(v7m.vpr);
1332 switch (s->eci) {
1333 case ECI_NONE:
1334 case ECI_A0:
1335 /* Update both 01 and 23 fields */
1336 tcg_gen_deposit_i32(vpr, vpr,
1337 tcg_constant_i32(mask | (mask << 4)),
1338 R_V7M_VPR_MASK01_SHIFT,
1339 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
1340 break;
1341 case ECI_A0A1:
1342 case ECI_A0A1A2:
1343 case ECI_A0A1A2B0:
1344 /* Update only the 23 mask field */
1345 tcg_gen_deposit_i32(vpr, vpr,
1346 tcg_constant_i32(mask),
1347 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
1348 break;
1349 default:
1350 g_assert_not_reached();
1351 }
1352 store_cpu_field(vpr, v7m.vpr);
1353 }
1354
1355 static bool trans_VPST(DisasContext *s, arg_VPST *a)
1356 {
1357 /* mask == 0 is a "related encoding" */
1358 if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
1359 return false;
1360 }
1361 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1362 return true;
1363 }
1364 gen_vpst(s, a->mask);
1365 mve_update_and_store_eci(s);
1366 return true;
1367 }
1368
1369 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
1370 {
1371 /*
1372 * Invert the predicate in VPR.P0. We have call out to
1373 * a helper because this insn itself is beatwise and can
1374 * be predicated.
1375 */
1376 if (!dc_isar_feature(aa32_mve, s)) {
1377 return false;
1378 }
1379 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1380 return true;
1381 }
1382
1383 gen_helper_mve_vpnot(cpu_env);
1384 /* This insn updates predication bits */
1385 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1386 mve_update_eci(s);
1387 return true;
1388 }
1389
1390 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
1391 {
1392 /* VADDV: vector add across vector */
1393 static MVEGenVADDVFn * const fns[4][2] = {
1394 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
1395 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
1396 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
1397 { NULL, NULL }
1398 };
1399 TCGv_ptr qm;
1400 TCGv_i32 rda_i, rda_o;
1401
1402 if (!dc_isar_feature(aa32_mve, s) ||
1403 a->size == 3) {
1404 return false;
1405 }
1406 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1407 return true;
1408 }
1409
1410 /*
1411 * This insn is subject to beat-wise execution. Partial execution
1412 * of an A=0 (no-accumulate) insn which does not execute the first
1413 * beat must start with the current value of Rda, not zero.
1414 */
1415 if (a->a || mve_skip_first_beat(s)) {
1416 /* Accumulate input from Rda */
1417 rda_o = rda_i = load_reg(s, a->rda);
1418 } else {
1419 /* Accumulate starting at zero */
1420 rda_i = tcg_constant_i32(0);
1421 rda_o = tcg_temp_new_i32();
1422 }
1423
1424 qm = mve_qreg_ptr(a->qm);
1425 fns[a->size][a->u](rda_o, cpu_env, qm, rda_i);
1426 store_reg(s, a->rda, rda_o);
1427
1428 mve_update_eci(s);
1429 return true;
1430 }
1431
1432 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
1433 {
1434 /*
1435 * Vector Add Long Across Vector: accumulate the 32-bit
1436 * elements of the vector into a 64-bit result stored in
1437 * a pair of general-purpose registers.
1438 * No need to check Qm's bank: it is only 3 bits in decode.
1439 */
1440 TCGv_ptr qm;
1441 TCGv_i64 rda_i, rda_o;
1442 TCGv_i32 rdalo, rdahi;
1443
1444 if (!dc_isar_feature(aa32_mve, s)) {
1445 return false;
1446 }
1447 /*
1448 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1449 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1450 */
1451 if (a->rdahi == 13 || a->rdahi == 15) {
1452 return false;
1453 }
1454 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1455 return true;
1456 }
1457
1458 /*
1459 * This insn is subject to beat-wise execution. Partial execution
1460 * of an A=0 (no-accumulate) insn which does not execute the first
1461 * beat must start with the current value of RdaHi:RdaLo, not zero.
1462 */
1463 rda_o = tcg_temp_new_i64();
1464 if (a->a || mve_skip_first_beat(s)) {
1465 /* Accumulate input from RdaHi:RdaLo */
1466 rda_i = rda_o;
1467 rdalo = load_reg(s, a->rdalo);
1468 rdahi = load_reg(s, a->rdahi);
1469 tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi);
1470 } else {
1471 /* Accumulate starting at zero */
1472 rda_i = tcg_constant_i64(0);
1473 }
1474
1475 qm = mve_qreg_ptr(a->qm);
1476 if (a->u) {
1477 gen_helper_mve_vaddlv_u(rda_o, cpu_env, qm, rda_i);
1478 } else {
1479 gen_helper_mve_vaddlv_s(rda_o, cpu_env, qm, rda_i);
1480 }
1481
1482 rdalo = tcg_temp_new_i32();
1483 rdahi = tcg_temp_new_i32();
1484 tcg_gen_extrl_i64_i32(rdalo, rda_o);
1485 tcg_gen_extrh_i64_i32(rdahi, rda_o);
1486 store_reg(s, a->rdalo, rdalo);
1487 store_reg(s, a->rdahi, rdahi);
1488 mve_update_eci(s);
1489 return true;
1490 }
1491
1492 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn,
1493 GVecGen2iFn *vecfn)
1494 {
1495 TCGv_ptr qd;
1496 uint64_t imm;
1497
1498 if (!dc_isar_feature(aa32_mve, s) ||
1499 !mve_check_qreg_bank(s, a->qd) ||
1500 !fn) {
1501 return false;
1502 }
1503 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1504 return true;
1505 }
1506
1507 imm = asimd_imm_const(a->imm, a->cmode, a->op);
1508
1509 if (vecfn && mve_no_predication(s)) {
1510 vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd),
1511 imm, 16, 16);
1512 } else {
1513 qd = mve_qreg_ptr(a->qd);
1514 fn(cpu_env, qd, tcg_constant_i64(imm));
1515 }
1516 mve_update_eci(s);
1517 return true;
1518 }
1519
1520 static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs,
1521 int64_t c, uint32_t oprsz, uint32_t maxsz)
1522 {
1523 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c);
1524 }
1525
1526 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1527 {
1528 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1529 MVEGenOneOpImmFn *fn;
1530 GVecGen2iFn *vecfn;
1531
1532 if ((a->cmode & 1) && a->cmode < 12) {
1533 if (a->op) {
1534 /*
1535 * For op=1, the immediate will be inverted by asimd_imm_const(),
1536 * so the VBIC becomes a logical AND operation.
1537 */
1538 fn = gen_helper_mve_vandi;
1539 vecfn = tcg_gen_gvec_andi;
1540 } else {
1541 fn = gen_helper_mve_vorri;
1542 vecfn = tcg_gen_gvec_ori;
1543 }
1544 } else {
1545 /* There is one unallocated cmode/op combination in this space */
1546 if (a->cmode == 15 && a->op == 1) {
1547 return false;
1548 }
1549 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1550 fn = gen_helper_mve_vmovi;
1551 vecfn = gen_gvec_vmovi;
1552 }
1553 return do_1imm(s, a, fn, vecfn);
1554 }
1555
1556 static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1557 bool negateshift, GVecGen2iFn vecfn)
1558 {
1559 TCGv_ptr qd, qm;
1560 int shift = a->shift;
1561
1562 if (!dc_isar_feature(aa32_mve, s) ||
1563 !mve_check_qreg_bank(s, a->qd | a->qm) ||
1564 !fn) {
1565 return false;
1566 }
1567 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1568 return true;
1569 }
1570
1571 /*
1572 * When we handle a right shift insn using a left-shift helper
1573 * which permits a negative shift count to indicate a right-shift,
1574 * we must negate the shift count.
1575 */
1576 if (negateshift) {
1577 shift = -shift;
1578 }
1579
1580 if (vecfn && mve_no_predication(s)) {
1581 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm),
1582 shift, 16, 16);
1583 } else {
1584 qd = mve_qreg_ptr(a->qd);
1585 qm = mve_qreg_ptr(a->qm);
1586 fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1587 }
1588 mve_update_eci(s);
1589 return true;
1590 }
1591
1592 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1593 bool negateshift)
1594 {
1595 return do_2shift_vec(s, a, fn, negateshift, NULL);
1596 }
1597
1598 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \
1599 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1600 { \
1601 static MVEGenTwoOpShiftFn * const fns[] = { \
1602 gen_helper_mve_##FN##b, \
1603 gen_helper_mve_##FN##h, \
1604 gen_helper_mve_##FN##w, \
1605 NULL, \
1606 }; \
1607 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \
1608 }
1609
1610 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
1611 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
1612
1613 static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs,
1614 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1615 {
1616 /*
1617 * We get here with a negated shift count, and we must handle
1618 * shifts by the element size, which tcg_gen_gvec_sari() does not do.
1619 */
1620 shift = -shift;
1621 if (shift == (8 << vece)) {
1622 shift--;
1623 }
1624 tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz);
1625 }
1626
1627 static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs,
1628 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1629 {
1630 /*
1631 * We get here with a negated shift count, and we must handle
1632 * shifts by the element size, which tcg_gen_gvec_shri() does not do.
1633 */
1634 shift = -shift;
1635 if (shift == (8 << vece)) {
1636 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0);
1637 } else {
1638 tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz);
1639 }
1640 }
1641
1642 DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli)
1643 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1644 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1645 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1646 /* These right shifts use a left-shift helper with negated shift count */
1647 DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s)
1648 DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u)
1649 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1650 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1651
1652 DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri)
1653 DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli)
1654
1655 #define DO_2SHIFT_FP(INSN, FN) \
1656 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1657 { \
1658 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1659 return false; \
1660 } \
1661 return do_2shift(s, a, gen_helper_mve_##FN, false); \
1662 }
1663
1664 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh)
1665 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh)
1666 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs)
1667 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu)
1668 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf)
1669 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf)
1670 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs)
1671 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu)
1672
1673 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1674 MVEGenTwoOpShiftFn *fn)
1675 {
1676 TCGv_ptr qda;
1677 TCGv_i32 rm;
1678
1679 if (!dc_isar_feature(aa32_mve, s) ||
1680 !mve_check_qreg_bank(s, a->qda) ||
1681 a->rm == 13 || a->rm == 15 || !fn) {
1682 /* Rm cases are UNPREDICTABLE */
1683 return false;
1684 }
1685 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1686 return true;
1687 }
1688
1689 qda = mve_qreg_ptr(a->qda);
1690 rm = load_reg(s, a->rm);
1691 fn(cpu_env, qda, qda, rm);
1692 mve_update_eci(s);
1693 return true;
1694 }
1695
1696 #define DO_2SHIFT_SCALAR(INSN, FN) \
1697 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \
1698 { \
1699 static MVEGenTwoOpShiftFn * const fns[] = { \
1700 gen_helper_mve_##FN##b, \
1701 gen_helper_mve_##FN##h, \
1702 gen_helper_mve_##FN##w, \
1703 NULL, \
1704 }; \
1705 return do_2shift_scalar(s, a, fns[a->size]); \
1706 }
1707
1708 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1709 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1710 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1711 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1712 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1713 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1714 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1715 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1716
1717 #define DO_VSHLL(INSN, FN) \
1718 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1719 { \
1720 static MVEGenTwoOpShiftFn * const fns[] = { \
1721 gen_helper_mve_##FN##b, \
1722 gen_helper_mve_##FN##h, \
1723 }; \
1724 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \
1725 }
1726
1727 /*
1728 * For the VSHLL vector helpers, the vece is the size of the input
1729 * (ie MO_8 or MO_16); the helpers want to work in the output size.
1730 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
1731 */
1732 static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs,
1733 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1734 {
1735 unsigned ovece = vece + 1;
1736 unsigned ibits = vece == MO_8 ? 8 : 16;
1737 tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz);
1738 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1739 }
1740
1741 static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs,
1742 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1743 {
1744 unsigned ovece = vece + 1;
1745 tcg_gen_gvec_andi(ovece, dofs, aofs,
1746 ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz);
1747 tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz);
1748 }
1749
1750 static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs,
1751 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1752 {
1753 unsigned ovece = vece + 1;
1754 unsigned ibits = vece == MO_8 ? 8 : 16;
1755 if (shift == 0) {
1756 tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz);
1757 } else {
1758 tcg_gen_gvec_andi(ovece, dofs, aofs,
1759 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
1760 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1761 }
1762 }
1763
1764 static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs,
1765 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1766 {
1767 unsigned ovece = vece + 1;
1768 unsigned ibits = vece == MO_8 ? 8 : 16;
1769 if (shift == 0) {
1770 tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz);
1771 } else {
1772 tcg_gen_gvec_andi(ovece, dofs, aofs,
1773 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
1774 tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1775 }
1776 }
1777
1778 DO_VSHLL(VSHLL_BS, vshllbs)
1779 DO_VSHLL(VSHLL_BU, vshllbu)
1780 DO_VSHLL(VSHLL_TS, vshllts)
1781 DO_VSHLL(VSHLL_TU, vshlltu)
1782
1783 #define DO_2SHIFT_N(INSN, FN) \
1784 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1785 { \
1786 static MVEGenTwoOpShiftFn * const fns[] = { \
1787 gen_helper_mve_##FN##b, \
1788 gen_helper_mve_##FN##h, \
1789 }; \
1790 return do_2shift(s, a, fns[a->size], false); \
1791 }
1792
1793 DO_2SHIFT_N(VSHRNB, vshrnb)
1794 DO_2SHIFT_N(VSHRNT, vshrnt)
1795 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1796 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1797 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1798 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1799 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1800 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1801 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1802 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1803 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1804 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1805 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1806 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1807 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1808 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1809
1810 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1811 {
1812 /*
1813 * Whole Vector Left Shift with Carry. The carry is taken
1814 * from a general purpose register and written back there.
1815 * An imm of 0 means "shift by 32".
1816 */
1817 TCGv_ptr qd;
1818 TCGv_i32 rdm;
1819
1820 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1821 return false;
1822 }
1823 if (a->rdm == 13 || a->rdm == 15) {
1824 /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1825 return false;
1826 }
1827 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1828 return true;
1829 }
1830
1831 qd = mve_qreg_ptr(a->qd);
1832 rdm = load_reg(s, a->rdm);
1833 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1834 store_reg(s, a->rdm, rdm);
1835 mve_update_eci(s);
1836 return true;
1837 }
1838
1839 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1840 {
1841 TCGv_ptr qd;
1842 TCGv_i32 rn;
1843
1844 /*
1845 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1846 * This fills the vector with elements of successively increasing
1847 * or decreasing values, starting from Rn.
1848 */
1849 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1850 return false;
1851 }
1852 if (a->size == MO_64) {
1853 /* size 0b11 is another encoding */
1854 return false;
1855 }
1856 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1857 return true;
1858 }
1859
1860 qd = mve_qreg_ptr(a->qd);
1861 rn = load_reg(s, a->rn);
1862 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1863 store_reg(s, a->rn, rn);
1864 mve_update_eci(s);
1865 return true;
1866 }
1867
1868 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1869 {
1870 TCGv_ptr qd;
1871 TCGv_i32 rn, rm;
1872
1873 /*
1874 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1875 * This fills the vector with elements of successively increasing
1876 * or decreasing values, starting from Rn. Rm specifies a point where
1877 * the count wraps back around to 0. The updated offset is written back
1878 * to Rn.
1879 */
1880 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1881 return false;
1882 }
1883 if (!fn || a->rm == 13 || a->rm == 15) {
1884 /*
1885 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1886 * Rm == 13 is VIWDUP, VDWDUP.
1887 */
1888 return false;
1889 }
1890 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1891 return true;
1892 }
1893
1894 qd = mve_qreg_ptr(a->qd);
1895 rn = load_reg(s, a->rn);
1896 rm = load_reg(s, a->rm);
1897 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1898 store_reg(s, a->rn, rn);
1899 mve_update_eci(s);
1900 return true;
1901 }
1902
1903 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1904 {
1905 static MVEGenVIDUPFn * const fns[] = {
1906 gen_helper_mve_vidupb,
1907 gen_helper_mve_viduph,
1908 gen_helper_mve_vidupw,
1909 NULL,
1910 };
1911 return do_vidup(s, a, fns[a->size]);
1912 }
1913
1914 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1915 {
1916 static MVEGenVIDUPFn * const fns[] = {
1917 gen_helper_mve_vidupb,
1918 gen_helper_mve_viduph,
1919 gen_helper_mve_vidupw,
1920 NULL,
1921 };
1922 /* VDDUP is just like VIDUP but with a negative immediate */
1923 a->imm = -a->imm;
1924 return do_vidup(s, a, fns[a->size]);
1925 }
1926
1927 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1928 {
1929 static MVEGenVIWDUPFn * const fns[] = {
1930 gen_helper_mve_viwdupb,
1931 gen_helper_mve_viwduph,
1932 gen_helper_mve_viwdupw,
1933 NULL,
1934 };
1935 return do_viwdup(s, a, fns[a->size]);
1936 }
1937
1938 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1939 {
1940 static MVEGenVIWDUPFn * const fns[] = {
1941 gen_helper_mve_vdwdupb,
1942 gen_helper_mve_vdwduph,
1943 gen_helper_mve_vdwdupw,
1944 NULL,
1945 };
1946 return do_viwdup(s, a, fns[a->size]);
1947 }
1948
1949 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1950 {
1951 TCGv_ptr qn, qm;
1952
1953 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1954 !fn) {
1955 return false;
1956 }
1957 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1958 return true;
1959 }
1960
1961 qn = mve_qreg_ptr(a->qn);
1962 qm = mve_qreg_ptr(a->qm);
1963 fn(cpu_env, qn, qm);
1964 if (a->mask) {
1965 /* VPT */
1966 gen_vpst(s, a->mask);
1967 }
1968 /* This insn updates predication bits */
1969 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1970 mve_update_eci(s);
1971 return true;
1972 }
1973
1974 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
1975 MVEGenScalarCmpFn *fn)
1976 {
1977 TCGv_ptr qn;
1978 TCGv_i32 rm;
1979
1980 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
1981 return false;
1982 }
1983 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1984 return true;
1985 }
1986
1987 qn = mve_qreg_ptr(a->qn);
1988 if (a->rm == 15) {
1989 /* Encoding Rm=0b1111 means "constant zero" */
1990 rm = tcg_constant_i32(0);
1991 } else {
1992 rm = load_reg(s, a->rm);
1993 }
1994 fn(cpu_env, qn, rm);
1995 if (a->mask) {
1996 /* VPT */
1997 gen_vpst(s, a->mask);
1998 }
1999 /* This insn updates predication bits */
2000 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2001 mve_update_eci(s);
2002 return true;
2003 }
2004
2005 #define DO_VCMP(INSN, FN) \
2006 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2007 { \
2008 static MVEGenCmpFn * const fns[] = { \
2009 gen_helper_mve_##FN##b, \
2010 gen_helper_mve_##FN##h, \
2011 gen_helper_mve_##FN##w, \
2012 NULL, \
2013 }; \
2014 return do_vcmp(s, a, fns[a->size]); \
2015 } \
2016 static bool trans_##INSN##_scalar(DisasContext *s, \
2017 arg_vcmp_scalar *a) \
2018 { \
2019 static MVEGenScalarCmpFn * const fns[] = { \
2020 gen_helper_mve_##FN##_scalarb, \
2021 gen_helper_mve_##FN##_scalarh, \
2022 gen_helper_mve_##FN##_scalarw, \
2023 NULL, \
2024 }; \
2025 return do_vcmp_scalar(s, a, fns[a->size]); \
2026 }
2027
2028 DO_VCMP(VCMPEQ, vcmpeq)
2029 DO_VCMP(VCMPNE, vcmpne)
2030 DO_VCMP(VCMPCS, vcmpcs)
2031 DO_VCMP(VCMPHI, vcmphi)
2032 DO_VCMP(VCMPGE, vcmpge)
2033 DO_VCMP(VCMPLT, vcmplt)
2034 DO_VCMP(VCMPGT, vcmpgt)
2035 DO_VCMP(VCMPLE, vcmple)
2036
2037 #define DO_VCMP_FP(INSN, FN) \
2038 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2039 { \
2040 static MVEGenCmpFn * const fns[] = { \
2041 NULL, \
2042 gen_helper_mve_##FN##h, \
2043 gen_helper_mve_##FN##s, \
2044 NULL, \
2045 }; \
2046 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2047 return false; \
2048 } \
2049 return do_vcmp(s, a, fns[a->size]); \
2050 } \
2051 static bool trans_##INSN##_scalar(DisasContext *s, \
2052 arg_vcmp_scalar *a) \
2053 { \
2054 static MVEGenScalarCmpFn * const fns[] = { \
2055 NULL, \
2056 gen_helper_mve_##FN##_scalarh, \
2057 gen_helper_mve_##FN##_scalars, \
2058 NULL, \
2059 }; \
2060 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2061 return false; \
2062 } \
2063 return do_vcmp_scalar(s, a, fns[a->size]); \
2064 }
2065
2066 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq)
2067 DO_VCMP_FP(VCMPNE_fp, vfcmpne)
2068 DO_VCMP_FP(VCMPGE_fp, vfcmpge)
2069 DO_VCMP_FP(VCMPLT_fp, vfcmplt)
2070 DO_VCMP_FP(VCMPGT_fp, vfcmpgt)
2071 DO_VCMP_FP(VCMPLE_fp, vfcmple)
2072
2073 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
2074 {
2075 /*
2076 * MIN/MAX operations across a vector: compute the min or
2077 * max of the initial value in a general purpose register
2078 * and all the elements in the vector, and store it back
2079 * into the general purpose register.
2080 */
2081 TCGv_ptr qm;
2082 TCGv_i32 rda;
2083
2084 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
2085 !fn || a->rda == 13 || a->rda == 15) {
2086 /* Rda cases are UNPREDICTABLE */
2087 return false;
2088 }
2089 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2090 return true;
2091 }
2092
2093 qm = mve_qreg_ptr(a->qm);
2094 rda = load_reg(s, a->rda);
2095 fn(rda, cpu_env, qm, rda);
2096 store_reg(s, a->rda, rda);
2097 mve_update_eci(s);
2098 return true;
2099 }
2100
2101 #define DO_VMAXV(INSN, FN) \
2102 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2103 { \
2104 static MVEGenVADDVFn * const fns[] = { \
2105 gen_helper_mve_##FN##b, \
2106 gen_helper_mve_##FN##h, \
2107 gen_helper_mve_##FN##w, \
2108 NULL, \
2109 }; \
2110 return do_vmaxv(s, a, fns[a->size]); \
2111 }
2112
2113 DO_VMAXV(VMAXV_S, vmaxvs)
2114 DO_VMAXV(VMAXV_U, vmaxvu)
2115 DO_VMAXV(VMAXAV, vmaxav)
2116 DO_VMAXV(VMINV_S, vminvs)
2117 DO_VMAXV(VMINV_U, vminvu)
2118 DO_VMAXV(VMINAV, vminav)
2119
2120 #define DO_VMAXV_FP(INSN, FN) \
2121 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2122 { \
2123 static MVEGenVADDVFn * const fns[] = { \
2124 NULL, \
2125 gen_helper_mve_##FN##h, \
2126 gen_helper_mve_##FN##s, \
2127 NULL, \
2128 }; \
2129 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2130 return false; \
2131 } \
2132 return do_vmaxv(s, a, fns[a->size]); \
2133 }
2134
2135 DO_VMAXV_FP(VMAXNMV, vmaxnmv)
2136 DO_VMAXV_FP(VMINNMV, vminnmv)
2137 DO_VMAXV_FP(VMAXNMAV, vmaxnmav)
2138 DO_VMAXV_FP(VMINNMAV, vminnmav)
2139
2140 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
2141 {
2142 /* Absolute difference accumulated across vector */
2143 TCGv_ptr qn, qm;
2144 TCGv_i32 rda;
2145
2146 if (!dc_isar_feature(aa32_mve, s) ||
2147 !mve_check_qreg_bank(s, a->qm | a->qn) ||
2148 !fn || a->rda == 13 || a->rda == 15) {
2149 /* Rda cases are UNPREDICTABLE */
2150 return false;
2151 }
2152 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2153 return true;
2154 }
2155
2156 qm = mve_qreg_ptr(a->qm);
2157 qn = mve_qreg_ptr(a->qn);
2158 rda = load_reg(s, a->rda);
2159 fn(rda, cpu_env, qn, qm, rda);
2160 store_reg(s, a->rda, rda);
2161 mve_update_eci(s);
2162 return true;
2163 }
2164
2165 #define DO_VABAV(INSN, FN) \
2166 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \
2167 { \
2168 static MVEGenVABAVFn * const fns[] = { \
2169 gen_helper_mve_##FN##b, \
2170 gen_helper_mve_##FN##h, \
2171 gen_helper_mve_##FN##w, \
2172 NULL, \
2173 }; \
2174 return do_vabav(s, a, fns[a->size]); \
2175 }
2176
2177 DO_VABAV(VABAV_S, vabavs)
2178 DO_VABAV(VABAV_U, vabavu)
2179
2180 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
2181 {
2182 /*
2183 * VMOV two 32-bit vector lanes to two general-purpose registers.
2184 * This insn is not predicated but it is subject to beat-wise
2185 * execution if it is not in an IT block. For us this means
2186 * only that if PSR.ECI says we should not be executing the beat
2187 * corresponding to the lane of the vector register being accessed
2188 * then we should skip perfoming the move, and that we need to do
2189 * the usual check for bad ECI state and advance of ECI state.
2190 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2191 */
2192 TCGv_i32 tmp;
2193 int vd;
2194
2195 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
2196 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
2197 a->rt == a->rt2) {
2198 /* Rt/Rt2 cases are UNPREDICTABLE */
2199 return false;
2200 }
2201 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2202 return true;
2203 }
2204
2205 /* Convert Qreg index to Dreg for read_neon_element32() etc */
2206 vd = a->qd * 2;
2207
2208 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
2209 tmp = tcg_temp_new_i32();
2210 read_neon_element32(tmp, vd, a->idx, MO_32);
2211 store_reg(s, a->rt, tmp);
2212 }
2213 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
2214 tmp = tcg_temp_new_i32();
2215 read_neon_element32(tmp, vd + 1, a->idx, MO_32);
2216 store_reg(s, a->rt2, tmp);
2217 }
2218
2219 mve_update_and_store_eci(s);
2220 return true;
2221 }
2222
2223 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
2224 {
2225 /*
2226 * VMOV two general-purpose registers to two 32-bit vector lanes.
2227 * This insn is not predicated but it is subject to beat-wise
2228 * execution if it is not in an IT block. For us this means
2229 * only that if PSR.ECI says we should not be executing the beat
2230 * corresponding to the lane of the vector register being accessed
2231 * then we should skip perfoming the move, and that we need to do
2232 * the usual check for bad ECI state and advance of ECI state.
2233 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2234 */
2235 TCGv_i32 tmp;
2236 int vd;
2237
2238 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
2239 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
2240 /* Rt/Rt2 cases are UNPREDICTABLE */
2241 return false;
2242 }
2243 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2244 return true;
2245 }
2246
2247 /* Convert Qreg idx to Dreg for read_neon_element32() etc */
2248 vd = a->qd * 2;
2249
2250 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
2251 tmp = load_reg(s, a->rt);
2252 write_neon_element32(tmp, vd, a->idx, MO_32);
2253 }
2254 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
2255 tmp = load_reg(s, a->rt2);
2256 write_neon_element32(tmp, vd + 1, a->idx, MO_32);
2257 }
2258
2259 mve_update_and_store_eci(s);
2260 return true;
2261 }