]> git.proxmox.com Git - mirror_qemu.git/blob - target/arm/tcg/translate-neon.c
6fac577abd8c24683c03c525f86eda8350a1d655
[mirror_qemu.git] / target / arm / tcg / translate-neon.c
1 /*
2 * ARM translation: AArch32 Neon instructions
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2020 Linaro, Ltd.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include "qemu/osdep.h"
24 #include "tcg/tcg-op.h"
25 #include "tcg/tcg-op-gvec.h"
26 #include "exec/exec-all.h"
27 #include "translate.h"
28 #include "translate-a32.h"
29
30 /* Include the generated Neon decoder */
31 #include "decode-neon-dp.c.inc"
32 #include "decode-neon-ls.c.inc"
33 #include "decode-neon-shared.c.inc"
34
35 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
36 {
37 TCGv_ptr ret = tcg_temp_new_ptr();
38 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
39 return ret;
40 }
41
42 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
43 {
44 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
45
46 switch (mop) {
47 case MO_UB:
48 tcg_gen_ld8u_i32(var, cpu_env, offset);
49 break;
50 case MO_UW:
51 tcg_gen_ld16u_i32(var, cpu_env, offset);
52 break;
53 case MO_UL:
54 tcg_gen_ld_i32(var, cpu_env, offset);
55 break;
56 default:
57 g_assert_not_reached();
58 }
59 }
60
61 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
62 {
63 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
64
65 switch (mop) {
66 case MO_UB:
67 tcg_gen_ld8u_i64(var, cpu_env, offset);
68 break;
69 case MO_UW:
70 tcg_gen_ld16u_i64(var, cpu_env, offset);
71 break;
72 case MO_UL:
73 tcg_gen_ld32u_i64(var, cpu_env, offset);
74 break;
75 case MO_UQ:
76 tcg_gen_ld_i64(var, cpu_env, offset);
77 break;
78 default:
79 g_assert_not_reached();
80 }
81 }
82
83 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
84 {
85 long offset = neon_element_offset(reg, ele, size);
86
87 switch (size) {
88 case MO_8:
89 tcg_gen_st8_i32(var, cpu_env, offset);
90 break;
91 case MO_16:
92 tcg_gen_st16_i32(var, cpu_env, offset);
93 break;
94 case MO_32:
95 tcg_gen_st_i32(var, cpu_env, offset);
96 break;
97 default:
98 g_assert_not_reached();
99 }
100 }
101
102 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
103 {
104 long offset = neon_element_offset(reg, ele, size);
105
106 switch (size) {
107 case MO_8:
108 tcg_gen_st8_i64(var, cpu_env, offset);
109 break;
110 case MO_16:
111 tcg_gen_st16_i64(var, cpu_env, offset);
112 break;
113 case MO_32:
114 tcg_gen_st32_i64(var, cpu_env, offset);
115 break;
116 case MO_64:
117 tcg_gen_st_i64(var, cpu_env, offset);
118 break;
119 default:
120 g_assert_not_reached();
121 }
122 }
123
124 static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
125 int data, gen_helper_gvec_4 *fn_gvec)
126 {
127 /* UNDEF accesses to D16-D31 if they don't exist. */
128 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
129 return false;
130 }
131
132 /*
133 * UNDEF accesses to odd registers for each bit of Q.
134 * Q will be 0b111 for all Q-reg instructions, otherwise
135 * when we have mixed Q- and D-reg inputs.
136 */
137 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
138 return false;
139 }
140
141 if (!vfp_access_check(s)) {
142 return true;
143 }
144
145 int opr_sz = q ? 16 : 8;
146 tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
147 vfp_reg_offset(1, vn),
148 vfp_reg_offset(1, vm),
149 vfp_reg_offset(1, vd),
150 opr_sz, opr_sz, data, fn_gvec);
151 return true;
152 }
153
154 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
155 int data, ARMFPStatusFlavour fp_flavour,
156 gen_helper_gvec_4_ptr *fn_gvec_ptr)
157 {
158 /* UNDEF accesses to D16-D31 if they don't exist. */
159 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
160 return false;
161 }
162
163 /*
164 * UNDEF accesses to odd registers for each bit of Q.
165 * Q will be 0b111 for all Q-reg instructions, otherwise
166 * when we have mixed Q- and D-reg inputs.
167 */
168 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
169 return false;
170 }
171
172 if (!vfp_access_check(s)) {
173 return true;
174 }
175
176 int opr_sz = q ? 16 : 8;
177 TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
178
179 tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
180 vfp_reg_offset(1, vn),
181 vfp_reg_offset(1, vm),
182 vfp_reg_offset(1, vd),
183 fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
184 return true;
185 }
186
187 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
188 {
189 if (!dc_isar_feature(aa32_vcma, s)) {
190 return false;
191 }
192 if (a->size == MO_16) {
193 if (!dc_isar_feature(aa32_fp16_arith, s)) {
194 return false;
195 }
196 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
197 FPST_STD_F16, gen_helper_gvec_fcmlah);
198 }
199 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
200 FPST_STD, gen_helper_gvec_fcmlas);
201 }
202
203 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
204 {
205 int opr_sz;
206 TCGv_ptr fpst;
207 gen_helper_gvec_3_ptr *fn_gvec_ptr;
208
209 if (!dc_isar_feature(aa32_vcma, s)
210 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
211 return false;
212 }
213
214 /* UNDEF accesses to D16-D31 if they don't exist. */
215 if (!dc_isar_feature(aa32_simd_r32, s) &&
216 ((a->vd | a->vn | a->vm) & 0x10)) {
217 return false;
218 }
219
220 if ((a->vn | a->vm | a->vd) & a->q) {
221 return false;
222 }
223
224 if (!vfp_access_check(s)) {
225 return true;
226 }
227
228 opr_sz = (1 + a->q) * 8;
229 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
230 fn_gvec_ptr = (a->size == MO_16) ?
231 gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
232 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
233 vfp_reg_offset(1, a->vn),
234 vfp_reg_offset(1, a->vm),
235 fpst, opr_sz, opr_sz, a->rot,
236 fn_gvec_ptr);
237 return true;
238 }
239
240 static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
241 {
242 if (!dc_isar_feature(aa32_dp, s)) {
243 return false;
244 }
245 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
246 gen_helper_gvec_sdot_b);
247 }
248
249 static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
250 {
251 if (!dc_isar_feature(aa32_dp, s)) {
252 return false;
253 }
254 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
255 gen_helper_gvec_udot_b);
256 }
257
258 static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
259 {
260 if (!dc_isar_feature(aa32_i8mm, s)) {
261 return false;
262 }
263 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
264 gen_helper_gvec_usdot_b);
265 }
266
267 static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
268 {
269 if (!dc_isar_feature(aa32_bf16, s)) {
270 return false;
271 }
272 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
273 gen_helper_gvec_bfdot);
274 }
275
276 static bool trans_VFML(DisasContext *s, arg_VFML *a)
277 {
278 int opr_sz;
279
280 if (!dc_isar_feature(aa32_fhm, s)) {
281 return false;
282 }
283
284 /* UNDEF accesses to D16-D31 if they don't exist. */
285 if (!dc_isar_feature(aa32_simd_r32, s) &&
286 (a->vd & 0x10)) {
287 return false;
288 }
289
290 if (a->vd & a->q) {
291 return false;
292 }
293
294 if (!vfp_access_check(s)) {
295 return true;
296 }
297
298 opr_sz = (1 + a->q) * 8;
299 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
300 vfp_reg_offset(a->q, a->vn),
301 vfp_reg_offset(a->q, a->vm),
302 cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
303 gen_helper_gvec_fmlal_a32);
304 return true;
305 }
306
307 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
308 {
309 int data = (a->index << 2) | a->rot;
310
311 if (!dc_isar_feature(aa32_vcma, s)) {
312 return false;
313 }
314 if (a->size == MO_16) {
315 if (!dc_isar_feature(aa32_fp16_arith, s)) {
316 return false;
317 }
318 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
319 FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
320 }
321 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
322 FPST_STD, gen_helper_gvec_fcmlas_idx);
323 }
324
325 static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
326 {
327 if (!dc_isar_feature(aa32_dp, s)) {
328 return false;
329 }
330 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
331 gen_helper_gvec_sdot_idx_b);
332 }
333
334 static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
335 {
336 if (!dc_isar_feature(aa32_dp, s)) {
337 return false;
338 }
339 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
340 gen_helper_gvec_udot_idx_b);
341 }
342
343 static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
344 {
345 if (!dc_isar_feature(aa32_i8mm, s)) {
346 return false;
347 }
348 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
349 gen_helper_gvec_usdot_idx_b);
350 }
351
352 static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
353 {
354 if (!dc_isar_feature(aa32_i8mm, s)) {
355 return false;
356 }
357 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
358 gen_helper_gvec_sudot_idx_b);
359 }
360
361 static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
362 {
363 if (!dc_isar_feature(aa32_bf16, s)) {
364 return false;
365 }
366 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
367 gen_helper_gvec_bfdot_idx);
368 }
369
370 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
371 {
372 int opr_sz;
373
374 if (!dc_isar_feature(aa32_fhm, s)) {
375 return false;
376 }
377
378 /* UNDEF accesses to D16-D31 if they don't exist. */
379 if (!dc_isar_feature(aa32_simd_r32, s) &&
380 ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
381 return false;
382 }
383
384 if (a->vd & a->q) {
385 return false;
386 }
387
388 if (!vfp_access_check(s)) {
389 return true;
390 }
391
392 opr_sz = (1 + a->q) * 8;
393 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
394 vfp_reg_offset(a->q, a->vn),
395 vfp_reg_offset(a->q, a->rm),
396 cpu_env, opr_sz, opr_sz,
397 (a->index << 2) | a->s, /* is_2 == 0 */
398 gen_helper_gvec_fmlal_idx_a32);
399 return true;
400 }
401
402 static struct {
403 int nregs;
404 int interleave;
405 int spacing;
406 } const neon_ls_element_type[11] = {
407 {1, 4, 1},
408 {1, 4, 2},
409 {4, 1, 1},
410 {2, 2, 2},
411 {1, 3, 1},
412 {1, 3, 2},
413 {3, 1, 1},
414 {1, 1, 1},
415 {1, 2, 1},
416 {1, 2, 2},
417 {2, 1, 1}
418 };
419
420 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
421 int stride)
422 {
423 if (rm != 15) {
424 TCGv_i32 base;
425
426 base = load_reg(s, rn);
427 if (rm == 13) {
428 tcg_gen_addi_i32(base, base, stride);
429 } else {
430 TCGv_i32 index;
431 index = load_reg(s, rm);
432 tcg_gen_add_i32(base, base, index);
433 }
434 store_reg(s, rn, base);
435 }
436 }
437
438 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
439 {
440 /* Neon load/store multiple structures */
441 int nregs, interleave, spacing, reg, n;
442 MemOp mop, align, endian;
443 int mmu_idx = get_mem_index(s);
444 int size = a->size;
445 TCGv_i64 tmp64;
446 TCGv_i32 addr;
447
448 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
449 return false;
450 }
451
452 /* UNDEF accesses to D16-D31 if they don't exist */
453 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
454 return false;
455 }
456 if (a->itype > 10) {
457 return false;
458 }
459 /* Catch UNDEF cases for bad values of align field */
460 switch (a->itype & 0xc) {
461 case 4:
462 if (a->align >= 2) {
463 return false;
464 }
465 break;
466 case 8:
467 if (a->align == 3) {
468 return false;
469 }
470 break;
471 default:
472 break;
473 }
474 nregs = neon_ls_element_type[a->itype].nregs;
475 interleave = neon_ls_element_type[a->itype].interleave;
476 spacing = neon_ls_element_type[a->itype].spacing;
477 if (size == 3 && (interleave | spacing) != 1) {
478 return false;
479 }
480
481 if (!vfp_access_check(s)) {
482 return true;
483 }
484
485 /* For our purposes, bytes are always little-endian. */
486 endian = s->be_data;
487 if (size == 0) {
488 endian = MO_LE;
489 }
490
491 /* Enforce alignment requested by the instruction */
492 if (a->align) {
493 align = pow2_align(a->align + 2); /* 4 ** a->align */
494 } else {
495 align = s->align_mem ? MO_ALIGN : 0;
496 }
497
498 /*
499 * Consecutive little-endian elements from a single register
500 * can be promoted to a larger little-endian operation.
501 */
502 if (interleave == 1 && endian == MO_LE) {
503 /* Retain any natural alignment. */
504 if (align == MO_ALIGN) {
505 align = pow2_align(size);
506 }
507 size = 3;
508 }
509
510 tmp64 = tcg_temp_new_i64();
511 addr = tcg_temp_new_i32();
512 load_reg_var(s, addr, a->rn);
513
514 mop = endian | size | align;
515 for (reg = 0; reg < nregs; reg++) {
516 for (n = 0; n < 8 >> size; n++) {
517 int xs;
518 for (xs = 0; xs < interleave; xs++) {
519 int tt = a->vd + reg + spacing * xs;
520
521 if (a->l) {
522 gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
523 neon_store_element64(tt, n, size, tmp64);
524 } else {
525 neon_load_element64(tmp64, tt, n, size);
526 gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
527 }
528 tcg_gen_addi_i32(addr, addr, 1 << size);
529
530 /* Subsequent memory operations inherit alignment */
531 mop &= ~MO_AMASK;
532 }
533 }
534 }
535
536 gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
537 return true;
538 }
539
540 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
541 {
542 /* Neon load single structure to all lanes */
543 int reg, stride, vec_size;
544 int vd = a->vd;
545 int size = a->size;
546 int nregs = a->n + 1;
547 TCGv_i32 addr, tmp;
548 MemOp mop, align;
549
550 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
551 return false;
552 }
553
554 /* UNDEF accesses to D16-D31 if they don't exist */
555 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
556 return false;
557 }
558
559 align = 0;
560 if (size == 3) {
561 if (nregs != 4 || a->a == 0) {
562 return false;
563 }
564 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
565 size = MO_32;
566 align = MO_ALIGN_16;
567 } else if (a->a) {
568 switch (nregs) {
569 case 1:
570 if (size == 0) {
571 return false;
572 }
573 align = MO_ALIGN;
574 break;
575 case 2:
576 align = pow2_align(size + 1);
577 break;
578 case 3:
579 return false;
580 case 4:
581 if (size == 2) {
582 align = pow2_align(3);
583 } else {
584 align = pow2_align(size + 2);
585 }
586 break;
587 default:
588 g_assert_not_reached();
589 }
590 }
591
592 if (!vfp_access_check(s)) {
593 return true;
594 }
595
596 /*
597 * VLD1 to all lanes: T bit indicates how many Dregs to write.
598 * VLD2/3/4 to all lanes: T bit indicates register stride.
599 */
600 stride = a->t ? 2 : 1;
601 vec_size = nregs == 1 ? stride * 8 : 8;
602 mop = size | align;
603 tmp = tcg_temp_new_i32();
604 addr = tcg_temp_new_i32();
605 load_reg_var(s, addr, a->rn);
606 for (reg = 0; reg < nregs; reg++) {
607 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
608 if ((vd & 1) && vec_size == 16) {
609 /*
610 * We cannot write 16 bytes at once because the
611 * destination is unaligned.
612 */
613 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
614 8, 8, tmp);
615 tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
616 neon_full_reg_offset(vd), 8, 8);
617 } else {
618 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
619 vec_size, vec_size, tmp);
620 }
621 tcg_gen_addi_i32(addr, addr, 1 << size);
622 vd += stride;
623
624 /* Subsequent memory operations inherit alignment */
625 mop &= ~MO_AMASK;
626 }
627
628 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
629
630 return true;
631 }
632
633 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
634 {
635 /* Neon load/store single structure to one lane */
636 int reg;
637 int nregs = a->n + 1;
638 int vd = a->vd;
639 TCGv_i32 addr, tmp;
640 MemOp mop;
641
642 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
643 return false;
644 }
645
646 /* UNDEF accesses to D16-D31 if they don't exist */
647 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
648 return false;
649 }
650
651 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
652 switch (nregs) {
653 case 1:
654 if (a->stride != 1) {
655 return false;
656 }
657 if (((a->align & (1 << a->size)) != 0) ||
658 (a->size == 2 && (a->align == 1 || a->align == 2))) {
659 return false;
660 }
661 break;
662 case 2:
663 if (a->size == 2 && (a->align & 2) != 0) {
664 return false;
665 }
666 break;
667 case 3:
668 if (a->align != 0) {
669 return false;
670 }
671 break;
672 case 4:
673 if (a->size == 2 && a->align == 3) {
674 return false;
675 }
676 break;
677 default:
678 g_assert_not_reached();
679 }
680 if ((vd + a->stride * (nregs - 1)) > 31) {
681 /*
682 * Attempts to write off the end of the register file are
683 * UNPREDICTABLE; we choose to UNDEF because otherwise we would
684 * access off the end of the array that holds the register data.
685 */
686 return false;
687 }
688
689 if (!vfp_access_check(s)) {
690 return true;
691 }
692
693 /* Pick up SCTLR settings */
694 mop = finalize_memop(s, a->size);
695
696 if (a->align) {
697 MemOp align_op;
698
699 switch (nregs) {
700 case 1:
701 /* For VLD1, use natural alignment. */
702 align_op = MO_ALIGN;
703 break;
704 case 2:
705 /* For VLD2, use double alignment. */
706 align_op = pow2_align(a->size + 1);
707 break;
708 case 4:
709 if (a->size == MO_32) {
710 /*
711 * For VLD4.32, align = 1 is double alignment, align = 2 is
712 * quad alignment; align = 3 is rejected above.
713 */
714 align_op = pow2_align(a->size + a->align);
715 } else {
716 /* For VLD4.8 and VLD.16, we want quad alignment. */
717 align_op = pow2_align(a->size + 2);
718 }
719 break;
720 default:
721 /* For VLD3, the alignment field is zero and rejected above. */
722 g_assert_not_reached();
723 }
724
725 mop = (mop & ~MO_AMASK) | align_op;
726 }
727
728 tmp = tcg_temp_new_i32();
729 addr = tcg_temp_new_i32();
730 load_reg_var(s, addr, a->rn);
731
732 for (reg = 0; reg < nregs; reg++) {
733 if (a->l) {
734 gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
735 neon_store_element(vd, a->reg_idx, a->size, tmp);
736 } else { /* Store */
737 neon_load_element(tmp, vd, a->reg_idx, a->size);
738 gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
739 }
740 vd += a->stride;
741 tcg_gen_addi_i32(addr, addr, 1 << a->size);
742
743 /* Subsequent memory operations inherit alignment */
744 mop &= ~MO_AMASK;
745 }
746
747 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
748
749 return true;
750 }
751
752 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
753 {
754 int vec_size = a->q ? 16 : 8;
755 int rd_ofs = neon_full_reg_offset(a->vd);
756 int rn_ofs = neon_full_reg_offset(a->vn);
757 int rm_ofs = neon_full_reg_offset(a->vm);
758
759 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
760 return false;
761 }
762
763 /* UNDEF accesses to D16-D31 if they don't exist. */
764 if (!dc_isar_feature(aa32_simd_r32, s) &&
765 ((a->vd | a->vn | a->vm) & 0x10)) {
766 return false;
767 }
768
769 if ((a->vn | a->vm | a->vd) & a->q) {
770 return false;
771 }
772
773 if (!vfp_access_check(s)) {
774 return true;
775 }
776
777 fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
778 return true;
779 }
780
781 #define DO_3SAME(INSN, FUNC) \
782 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
783 { \
784 return do_3same(s, a, FUNC); \
785 }
786
787 DO_3SAME(VADD, tcg_gen_gvec_add)
788 DO_3SAME(VSUB, tcg_gen_gvec_sub)
789 DO_3SAME(VAND, tcg_gen_gvec_and)
790 DO_3SAME(VBIC, tcg_gen_gvec_andc)
791 DO_3SAME(VORR, tcg_gen_gvec_or)
792 DO_3SAME(VORN, tcg_gen_gvec_orc)
793 DO_3SAME(VEOR, tcg_gen_gvec_xor)
794 DO_3SAME(VSHL_S, gen_gvec_sshl)
795 DO_3SAME(VSHL_U, gen_gvec_ushl)
796 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
797 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
798 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
799 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
800
801 /* These insns are all gvec_bitsel but with the inputs in various orders. */
802 #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
803 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
804 uint32_t rn_ofs, uint32_t rm_ofs, \
805 uint32_t oprsz, uint32_t maxsz) \
806 { \
807 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
808 } \
809 DO_3SAME(INSN, gen_##INSN##_3s)
810
811 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
812 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
813 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
814
815 #define DO_3SAME_NO_SZ_3(INSN, FUNC) \
816 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
817 { \
818 if (a->size == 3) { \
819 return false; \
820 } \
821 return do_3same(s, a, FUNC); \
822 }
823
824 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
825 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
826 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
827 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
828 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
829 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
830 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
831 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
832 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
833 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
834 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
835 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
836
837 #define DO_3SAME_CMP(INSN, COND) \
838 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
839 uint32_t rn_ofs, uint32_t rm_ofs, \
840 uint32_t oprsz, uint32_t maxsz) \
841 { \
842 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
843 } \
844 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
845
846 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
847 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
848 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
849 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
850 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
851
852 #define WRAP_OOL_FN(WRAPNAME, FUNC) \
853 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \
854 uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \
855 { \
856 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
857 }
858
859 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
860
861 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
862 {
863 if (a->size != 0) {
864 return false;
865 }
866 return do_3same(s, a, gen_VMUL_p_3s);
867 }
868
869 #define DO_VQRDMLAH(INSN, FUNC) \
870 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
871 { \
872 if (!dc_isar_feature(aa32_rdm, s)) { \
873 return false; \
874 } \
875 if (a->size != 1 && a->size != 2) { \
876 return false; \
877 } \
878 return do_3same(s, a, FUNC); \
879 }
880
881 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
882 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
883
884 #define DO_SHA1(NAME, FUNC) \
885 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
886 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
887 { \
888 if (!dc_isar_feature(aa32_sha1, s)) { \
889 return false; \
890 } \
891 return do_3same(s, a, gen_##NAME##_3s); \
892 }
893
894 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
895 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
896 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
897 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
898
899 #define DO_SHA2(NAME, FUNC) \
900 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
901 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
902 { \
903 if (!dc_isar_feature(aa32_sha2, s)) { \
904 return false; \
905 } \
906 return do_3same(s, a, gen_##NAME##_3s); \
907 }
908
909 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
910 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
911 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
912
913 #define DO_3SAME_64(INSN, FUNC) \
914 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
915 uint32_t rn_ofs, uint32_t rm_ofs, \
916 uint32_t oprsz, uint32_t maxsz) \
917 { \
918 static const GVecGen3 op = { .fni8 = FUNC }; \
919 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
920 } \
921 DO_3SAME(INSN, gen_##INSN##_3s)
922
923 #define DO_3SAME_64_ENV(INSN, FUNC) \
924 static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
925 { \
926 FUNC(d, cpu_env, n, m); \
927 } \
928 DO_3SAME_64(INSN, gen_##INSN##_elt)
929
930 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
931 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
932 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
933 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
934 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
935 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
936
937 #define DO_3SAME_32(INSN, FUNC) \
938 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
939 uint32_t rn_ofs, uint32_t rm_ofs, \
940 uint32_t oprsz, uint32_t maxsz) \
941 { \
942 static const GVecGen3 ops[4] = { \
943 { .fni4 = gen_helper_neon_##FUNC##8 }, \
944 { .fni4 = gen_helper_neon_##FUNC##16 }, \
945 { .fni4 = gen_helper_neon_##FUNC##32 }, \
946 { 0 }, \
947 }; \
948 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
949 } \
950 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
951 { \
952 if (a->size > 2) { \
953 return false; \
954 } \
955 return do_3same(s, a, gen_##INSN##_3s); \
956 }
957
958 /*
959 * Some helper functions need to be passed the cpu_env. In order
960 * to use those with the gvec APIs like tcg_gen_gvec_3() we need
961 * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
962 * and which call a NeonGenTwoOpEnvFn().
963 */
964 #define WRAP_ENV_FN(WRAPNAME, FUNC) \
965 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
966 { \
967 FUNC(d, cpu_env, n, m); \
968 }
969
970 #define DO_3SAME_32_ENV(INSN, FUNC) \
971 WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
972 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
973 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
974 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
975 uint32_t rn_ofs, uint32_t rm_ofs, \
976 uint32_t oprsz, uint32_t maxsz) \
977 { \
978 static const GVecGen3 ops[4] = { \
979 { .fni4 = gen_##INSN##_tramp8 }, \
980 { .fni4 = gen_##INSN##_tramp16 }, \
981 { .fni4 = gen_##INSN##_tramp32 }, \
982 { 0 }, \
983 }; \
984 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
985 } \
986 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
987 { \
988 if (a->size > 2) { \
989 return false; \
990 } \
991 return do_3same(s, a, gen_##INSN##_3s); \
992 }
993
994 DO_3SAME_32(VHADD_S, hadd_s)
995 DO_3SAME_32(VHADD_U, hadd_u)
996 DO_3SAME_32(VHSUB_S, hsub_s)
997 DO_3SAME_32(VHSUB_U, hsub_u)
998 DO_3SAME_32(VRHADD_S, rhadd_s)
999 DO_3SAME_32(VRHADD_U, rhadd_u)
1000 DO_3SAME_32(VRSHL_S, rshl_s)
1001 DO_3SAME_32(VRSHL_U, rshl_u)
1002
1003 DO_3SAME_32_ENV(VQSHL_S, qshl_s)
1004 DO_3SAME_32_ENV(VQSHL_U, qshl_u)
1005 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
1006 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
1007
1008 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
1009 {
1010 /* Operations handled pairwise 32 bits at a time */
1011 TCGv_i32 tmp, tmp2, tmp3;
1012
1013 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1014 return false;
1015 }
1016
1017 /* UNDEF accesses to D16-D31 if they don't exist. */
1018 if (!dc_isar_feature(aa32_simd_r32, s) &&
1019 ((a->vd | a->vn | a->vm) & 0x10)) {
1020 return false;
1021 }
1022
1023 if (a->size == 3) {
1024 return false;
1025 }
1026
1027 if (!vfp_access_check(s)) {
1028 return true;
1029 }
1030
1031 assert(a->q == 0); /* enforced by decode patterns */
1032
1033 /*
1034 * Note that we have to be careful not to clobber the source operands
1035 * in the "vm == vd" case by storing the result of the first pass too
1036 * early. Since Q is 0 there are always just two passes, so instead
1037 * of a complicated loop over each pass we just unroll.
1038 */
1039 tmp = tcg_temp_new_i32();
1040 tmp2 = tcg_temp_new_i32();
1041 tmp3 = tcg_temp_new_i32();
1042
1043 read_neon_element32(tmp, a->vn, 0, MO_32);
1044 read_neon_element32(tmp2, a->vn, 1, MO_32);
1045 fn(tmp, tmp, tmp2);
1046
1047 read_neon_element32(tmp3, a->vm, 0, MO_32);
1048 read_neon_element32(tmp2, a->vm, 1, MO_32);
1049 fn(tmp3, tmp3, tmp2);
1050
1051 write_neon_element32(tmp, a->vd, 0, MO_32);
1052 write_neon_element32(tmp3, a->vd, 1, MO_32);
1053
1054 return true;
1055 }
1056
1057 #define DO_3SAME_PAIR(INSN, func) \
1058 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
1059 { \
1060 static NeonGenTwoOpFn * const fns[] = { \
1061 gen_helper_neon_##func##8, \
1062 gen_helper_neon_##func##16, \
1063 gen_helper_neon_##func##32, \
1064 }; \
1065 if (a->size > 2) { \
1066 return false; \
1067 } \
1068 return do_3same_pair(s, a, fns[a->size]); \
1069 }
1070
1071 /* 32-bit pairwise ops end up the same as the elementwise versions. */
1072 #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
1073 #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
1074 #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
1075 #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
1076 #define gen_helper_neon_padd_u32 tcg_gen_add_i32
1077
1078 DO_3SAME_PAIR(VPMAX_S, pmax_s)
1079 DO_3SAME_PAIR(VPMIN_S, pmin_s)
1080 DO_3SAME_PAIR(VPMAX_U, pmax_u)
1081 DO_3SAME_PAIR(VPMIN_U, pmin_u)
1082 DO_3SAME_PAIR(VPADD, padd_u)
1083
1084 #define DO_3SAME_VQDMULH(INSN, FUNC) \
1085 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
1086 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
1087 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
1088 uint32_t rn_ofs, uint32_t rm_ofs, \
1089 uint32_t oprsz, uint32_t maxsz) \
1090 { \
1091 static const GVecGen3 ops[2] = { \
1092 { .fni4 = gen_##INSN##_tramp16 }, \
1093 { .fni4 = gen_##INSN##_tramp32 }, \
1094 }; \
1095 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
1096 } \
1097 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
1098 { \
1099 if (a->size != 1 && a->size != 2) { \
1100 return false; \
1101 } \
1102 return do_3same(s, a, gen_##INSN##_3s); \
1103 }
1104
1105 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
1106 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
1107
1108 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \
1109 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
1110 uint32_t rn_ofs, uint32_t rm_ofs, \
1111 uint32_t oprsz, uint32_t maxsz) \
1112 { \
1113 TCGv_ptr fpst = fpstatus_ptr(FPST); \
1114 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
1115 oprsz, maxsz, 0, FUNC); \
1116 }
1117
1118 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \
1119 WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \
1120 WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \
1121 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1122 { \
1123 if (a->size == MO_16) { \
1124 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
1125 return false; \
1126 } \
1127 return do_3same(s, a, gen_##INSN##_fp16_3s); \
1128 } \
1129 return do_3same(s, a, gen_##INSN##_fp32_3s); \
1130 }
1131
1132
1133 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1134 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1135 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1136 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1137 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1138 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1139 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1140 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1141 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1142 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1143 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1144 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1145 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1146 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1147 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1148 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1149 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1150
1151 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1152 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1153 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1154 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1155
1156 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1157 {
1158 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1159 return false;
1160 }
1161
1162 if (a->size == MO_16) {
1163 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1164 return false;
1165 }
1166 return do_3same(s, a, gen_VMAXNM_fp16_3s);
1167 }
1168 return do_3same(s, a, gen_VMAXNM_fp32_3s);
1169 }
1170
1171 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1172 {
1173 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1174 return false;
1175 }
1176
1177 if (a->size == MO_16) {
1178 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1179 return false;
1180 }
1181 return do_3same(s, a, gen_VMINNM_fp16_3s);
1182 }
1183 return do_3same(s, a, gen_VMINNM_fp32_3s);
1184 }
1185
1186 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
1187 gen_helper_gvec_3_ptr *fn)
1188 {
1189 /* FP pairwise operations */
1190 TCGv_ptr fpstatus;
1191
1192 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1193 return false;
1194 }
1195
1196 /* UNDEF accesses to D16-D31 if they don't exist. */
1197 if (!dc_isar_feature(aa32_simd_r32, s) &&
1198 ((a->vd | a->vn | a->vm) & 0x10)) {
1199 return false;
1200 }
1201
1202 if (!vfp_access_check(s)) {
1203 return true;
1204 }
1205
1206 assert(a->q == 0); /* enforced by decode patterns */
1207
1208
1209 fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1210 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
1211 vfp_reg_offset(1, a->vn),
1212 vfp_reg_offset(1, a->vm),
1213 fpstatus, 8, 8, 0, fn);
1214
1215 return true;
1216 }
1217
1218 /*
1219 * For all the functions using this macro, size == 1 means fp16,
1220 * which is an architecture extension we don't implement yet.
1221 */
1222 #define DO_3S_FP_PAIR(INSN,FUNC) \
1223 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1224 { \
1225 if (a->size == MO_16) { \
1226 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
1227 return false; \
1228 } \
1229 return do_3same_fp_pair(s, a, FUNC##h); \
1230 } \
1231 return do_3same_fp_pair(s, a, FUNC##s); \
1232 }
1233
1234 DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
1235 DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
1236 DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
1237
1238 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1239 {
1240 /* Handle a 2-reg-shift insn which can be vectorized. */
1241 int vec_size = a->q ? 16 : 8;
1242 int rd_ofs = neon_full_reg_offset(a->vd);
1243 int rm_ofs = neon_full_reg_offset(a->vm);
1244
1245 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1246 return false;
1247 }
1248
1249 /* UNDEF accesses to D16-D31 if they don't exist. */
1250 if (!dc_isar_feature(aa32_simd_r32, s) &&
1251 ((a->vd | a->vm) & 0x10)) {
1252 return false;
1253 }
1254
1255 if ((a->vm | a->vd) & a->q) {
1256 return false;
1257 }
1258
1259 if (!vfp_access_check(s)) {
1260 return true;
1261 }
1262
1263 fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1264 return true;
1265 }
1266
1267 #define DO_2SH(INSN, FUNC) \
1268 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1269 { \
1270 return do_vector_2sh(s, a, FUNC); \
1271 } \
1272
1273 DO_2SH(VSHL, tcg_gen_gvec_shli)
1274 DO_2SH(VSLI, gen_gvec_sli)
1275 DO_2SH(VSRI, gen_gvec_sri)
1276 DO_2SH(VSRA_S, gen_gvec_ssra)
1277 DO_2SH(VSRA_U, gen_gvec_usra)
1278 DO_2SH(VRSHR_S, gen_gvec_srshr)
1279 DO_2SH(VRSHR_U, gen_gvec_urshr)
1280 DO_2SH(VRSRA_S, gen_gvec_srsra)
1281 DO_2SH(VRSRA_U, gen_gvec_ursra)
1282
1283 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1284 {
1285 /* Signed shift out of range results in all-sign-bits */
1286 a->shift = MIN(a->shift, (8 << a->size) - 1);
1287 return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1288 }
1289
1290 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1291 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1292 {
1293 tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1294 }
1295
1296 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1297 {
1298 /* Shift out of range is architecturally valid and results in zero. */
1299 if (a->shift >= (8 << a->size)) {
1300 return do_vector_2sh(s, a, gen_zero_rd_2sh);
1301 } else {
1302 return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1303 }
1304 }
1305
1306 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1307 NeonGenTwo64OpEnvFn *fn)
1308 {
1309 /*
1310 * 2-reg-and-shift operations, size == 3 case, where the
1311 * function needs to be passed cpu_env.
1312 */
1313 TCGv_i64 constimm;
1314 int pass;
1315
1316 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1317 return false;
1318 }
1319
1320 /* UNDEF accesses to D16-D31 if they don't exist. */
1321 if (!dc_isar_feature(aa32_simd_r32, s) &&
1322 ((a->vd | a->vm) & 0x10)) {
1323 return false;
1324 }
1325
1326 if ((a->vm | a->vd) & a->q) {
1327 return false;
1328 }
1329
1330 if (!vfp_access_check(s)) {
1331 return true;
1332 }
1333
1334 /*
1335 * To avoid excessive duplication of ops we implement shift
1336 * by immediate using the variable shift operations.
1337 */
1338 constimm = tcg_constant_i64(dup_const(a->size, a->shift));
1339
1340 for (pass = 0; pass < a->q + 1; pass++) {
1341 TCGv_i64 tmp = tcg_temp_new_i64();
1342
1343 read_neon_element64(tmp, a->vm, pass, MO_64);
1344 fn(tmp, cpu_env, tmp, constimm);
1345 write_neon_element64(tmp, a->vd, pass, MO_64);
1346 }
1347 return true;
1348 }
1349
1350 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1351 NeonGenTwoOpEnvFn *fn)
1352 {
1353 /*
1354 * 2-reg-and-shift operations, size < 3 case, where the
1355 * helper needs to be passed cpu_env.
1356 */
1357 TCGv_i32 constimm, tmp;
1358 int pass;
1359
1360 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1361 return false;
1362 }
1363
1364 /* UNDEF accesses to D16-D31 if they don't exist. */
1365 if (!dc_isar_feature(aa32_simd_r32, s) &&
1366 ((a->vd | a->vm) & 0x10)) {
1367 return false;
1368 }
1369
1370 if ((a->vm | a->vd) & a->q) {
1371 return false;
1372 }
1373
1374 if (!vfp_access_check(s)) {
1375 return true;
1376 }
1377
1378 /*
1379 * To avoid excessive duplication of ops we implement shift
1380 * by immediate using the variable shift operations.
1381 */
1382 constimm = tcg_constant_i32(dup_const(a->size, a->shift));
1383 tmp = tcg_temp_new_i32();
1384
1385 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1386 read_neon_element32(tmp, a->vm, pass, MO_32);
1387 fn(tmp, cpu_env, tmp, constimm);
1388 write_neon_element32(tmp, a->vd, pass, MO_32);
1389 }
1390 return true;
1391 }
1392
1393 #define DO_2SHIFT_ENV(INSN, FUNC) \
1394 static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1395 { \
1396 return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
1397 } \
1398 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1399 { \
1400 static NeonGenTwoOpEnvFn * const fns[] = { \
1401 gen_helper_neon_##FUNC##8, \
1402 gen_helper_neon_##FUNC##16, \
1403 gen_helper_neon_##FUNC##32, \
1404 }; \
1405 assert(a->size < ARRAY_SIZE(fns)); \
1406 return do_2shift_env_32(s, a, fns[a->size]); \
1407 }
1408
1409 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1410 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1411 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1412
1413 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1414 NeonGenTwo64OpFn *shiftfn,
1415 NeonGenNarrowEnvFn *narrowfn)
1416 {
1417 /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1418 TCGv_i64 constimm, rm1, rm2;
1419 TCGv_i32 rd;
1420
1421 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1422 return false;
1423 }
1424
1425 /* UNDEF accesses to D16-D31 if they don't exist. */
1426 if (!dc_isar_feature(aa32_simd_r32, s) &&
1427 ((a->vd | a->vm) & 0x10)) {
1428 return false;
1429 }
1430
1431 if (a->vm & 1) {
1432 return false;
1433 }
1434
1435 if (!vfp_access_check(s)) {
1436 return true;
1437 }
1438
1439 /*
1440 * This is always a right shift, and the shiftfn is always a
1441 * left-shift helper, which thus needs the negated shift count.
1442 */
1443 constimm = tcg_constant_i64(-a->shift);
1444 rm1 = tcg_temp_new_i64();
1445 rm2 = tcg_temp_new_i64();
1446 rd = tcg_temp_new_i32();
1447
1448 /* Load both inputs first to avoid potential overwrite if rm == rd */
1449 read_neon_element64(rm1, a->vm, 0, MO_64);
1450 read_neon_element64(rm2, a->vm, 1, MO_64);
1451
1452 shiftfn(rm1, rm1, constimm);
1453 narrowfn(rd, cpu_env, rm1);
1454 write_neon_element32(rd, a->vd, 0, MO_32);
1455
1456 shiftfn(rm2, rm2, constimm);
1457 narrowfn(rd, cpu_env, rm2);
1458 write_neon_element32(rd, a->vd, 1, MO_32);
1459
1460 return true;
1461 }
1462
1463 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1464 NeonGenTwoOpFn *shiftfn,
1465 NeonGenNarrowEnvFn *narrowfn)
1466 {
1467 /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1468 TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1469 TCGv_i64 rtmp;
1470 uint32_t imm;
1471
1472 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1473 return false;
1474 }
1475
1476 /* UNDEF accesses to D16-D31 if they don't exist. */
1477 if (!dc_isar_feature(aa32_simd_r32, s) &&
1478 ((a->vd | a->vm) & 0x10)) {
1479 return false;
1480 }
1481
1482 if (a->vm & 1) {
1483 return false;
1484 }
1485
1486 if (!vfp_access_check(s)) {
1487 return true;
1488 }
1489
1490 /*
1491 * This is always a right shift, and the shiftfn is always a
1492 * left-shift helper, which thus needs the negated shift count
1493 * duplicated into each lane of the immediate value.
1494 */
1495 if (a->size == 1) {
1496 imm = (uint16_t)(-a->shift);
1497 imm |= imm << 16;
1498 } else {
1499 /* size == 2 */
1500 imm = -a->shift;
1501 }
1502 constimm = tcg_constant_i32(imm);
1503
1504 /* Load all inputs first to avoid potential overwrite */
1505 rm1 = tcg_temp_new_i32();
1506 rm2 = tcg_temp_new_i32();
1507 rm3 = tcg_temp_new_i32();
1508 rm4 = tcg_temp_new_i32();
1509 read_neon_element32(rm1, a->vm, 0, MO_32);
1510 read_neon_element32(rm2, a->vm, 1, MO_32);
1511 read_neon_element32(rm3, a->vm, 2, MO_32);
1512 read_neon_element32(rm4, a->vm, 3, MO_32);
1513 rtmp = tcg_temp_new_i64();
1514
1515 shiftfn(rm1, rm1, constimm);
1516 shiftfn(rm2, rm2, constimm);
1517
1518 tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1519
1520 narrowfn(rm1, cpu_env, rtmp);
1521 write_neon_element32(rm1, a->vd, 0, MO_32);
1522
1523 shiftfn(rm3, rm3, constimm);
1524 shiftfn(rm4, rm4, constimm);
1525
1526 tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1527
1528 narrowfn(rm3, cpu_env, rtmp);
1529 write_neon_element32(rm3, a->vd, 1, MO_32);
1530 return true;
1531 }
1532
1533 #define DO_2SN_64(INSN, FUNC, NARROWFUNC) \
1534 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1535 { \
1536 return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \
1537 }
1538 #define DO_2SN_32(INSN, FUNC, NARROWFUNC) \
1539 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1540 { \
1541 return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
1542 }
1543
1544 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1545 {
1546 tcg_gen_extrl_i64_i32(dest, src);
1547 }
1548
1549 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1550 {
1551 gen_helper_neon_narrow_u16(dest, src);
1552 }
1553
1554 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1555 {
1556 gen_helper_neon_narrow_u8(dest, src);
1557 }
1558
1559 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1560 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1561 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1562
1563 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1564 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1565 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1566
1567 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1568 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1569 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1570
1571 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1572 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1573 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1574 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1575 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1576 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1577
1578 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1579 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1580 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1581
1582 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1583 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1584 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1585
1586 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1587 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1588 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1589
1590 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1591 NeonGenWidenFn *widenfn, bool u)
1592 {
1593 TCGv_i64 tmp;
1594 TCGv_i32 rm0, rm1;
1595 uint64_t widen_mask = 0;
1596
1597 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1598 return false;
1599 }
1600
1601 /* UNDEF accesses to D16-D31 if they don't exist. */
1602 if (!dc_isar_feature(aa32_simd_r32, s) &&
1603 ((a->vd | a->vm) & 0x10)) {
1604 return false;
1605 }
1606
1607 if (a->vd & 1) {
1608 return false;
1609 }
1610
1611 if (!vfp_access_check(s)) {
1612 return true;
1613 }
1614
1615 /*
1616 * This is a widen-and-shift operation. The shift is always less
1617 * than the width of the source type, so after widening the input
1618 * vector we can simply shift the whole 64-bit widened register,
1619 * and then clear the potential overflow bits resulting from left
1620 * bits of the narrow input appearing as right bits of the left
1621 * neighbour narrow input. Calculate a mask of bits to clear.
1622 */
1623 if ((a->shift != 0) && (a->size < 2 || u)) {
1624 int esize = 8 << a->size;
1625 widen_mask = MAKE_64BIT_MASK(0, esize);
1626 widen_mask >>= esize - a->shift;
1627 widen_mask = dup_const(a->size + 1, widen_mask);
1628 }
1629
1630 rm0 = tcg_temp_new_i32();
1631 rm1 = tcg_temp_new_i32();
1632 read_neon_element32(rm0, a->vm, 0, MO_32);
1633 read_neon_element32(rm1, a->vm, 1, MO_32);
1634 tmp = tcg_temp_new_i64();
1635
1636 widenfn(tmp, rm0);
1637 if (a->shift != 0) {
1638 tcg_gen_shli_i64(tmp, tmp, a->shift);
1639 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1640 }
1641 write_neon_element64(tmp, a->vd, 0, MO_64);
1642
1643 widenfn(tmp, rm1);
1644 if (a->shift != 0) {
1645 tcg_gen_shli_i64(tmp, tmp, a->shift);
1646 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1647 }
1648 write_neon_element64(tmp, a->vd, 1, MO_64);
1649 return true;
1650 }
1651
1652 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1653 {
1654 static NeonGenWidenFn * const widenfn[] = {
1655 gen_helper_neon_widen_s8,
1656 gen_helper_neon_widen_s16,
1657 tcg_gen_ext_i32_i64,
1658 };
1659 return do_vshll_2sh(s, a, widenfn[a->size], false);
1660 }
1661
1662 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1663 {
1664 static NeonGenWidenFn * const widenfn[] = {
1665 gen_helper_neon_widen_u8,
1666 gen_helper_neon_widen_u16,
1667 tcg_gen_extu_i32_i64,
1668 };
1669 return do_vshll_2sh(s, a, widenfn[a->size], true);
1670 }
1671
1672 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1673 gen_helper_gvec_2_ptr *fn)
1674 {
1675 /* FP operations in 2-reg-and-shift group */
1676 int vec_size = a->q ? 16 : 8;
1677 int rd_ofs = neon_full_reg_offset(a->vd);
1678 int rm_ofs = neon_full_reg_offset(a->vm);
1679 TCGv_ptr fpst;
1680
1681 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1682 return false;
1683 }
1684
1685 if (a->size == MO_16) {
1686 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1687 return false;
1688 }
1689 }
1690
1691 /* UNDEF accesses to D16-D31 if they don't exist. */
1692 if (!dc_isar_feature(aa32_simd_r32, s) &&
1693 ((a->vd | a->vm) & 0x10)) {
1694 return false;
1695 }
1696
1697 if ((a->vm | a->vd) & a->q) {
1698 return false;
1699 }
1700
1701 if (!vfp_access_check(s)) {
1702 return true;
1703 }
1704
1705 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1706 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1707 return true;
1708 }
1709
1710 #define DO_FP_2SH(INSN, FUNC) \
1711 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1712 { \
1713 return do_fp_2sh(s, a, FUNC); \
1714 }
1715
1716 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1717 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1718 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1719 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1720
1721 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1722 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1723 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1724 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1725
1726 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1727 GVecGen2iFn *fn)
1728 {
1729 uint64_t imm;
1730 int reg_ofs, vec_size;
1731
1732 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1733 return false;
1734 }
1735
1736 /* UNDEF accesses to D16-D31 if they don't exist. */
1737 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1738 return false;
1739 }
1740
1741 if (a->vd & a->q) {
1742 return false;
1743 }
1744
1745 if (!vfp_access_check(s)) {
1746 return true;
1747 }
1748
1749 reg_ofs = neon_full_reg_offset(a->vd);
1750 vec_size = a->q ? 16 : 8;
1751 imm = asimd_imm_const(a->imm, a->cmode, a->op);
1752
1753 fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1754 return true;
1755 }
1756
1757 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1758 int64_t c, uint32_t oprsz, uint32_t maxsz)
1759 {
1760 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1761 }
1762
1763 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1764 {
1765 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1766 GVecGen2iFn *fn;
1767
1768 if ((a->cmode & 1) && a->cmode < 12) {
1769 /* for op=1, the imm will be inverted, so BIC becomes AND. */
1770 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1771 } else {
1772 /* There is one unallocated cmode/op combination in this space */
1773 if (a->cmode == 15 && a->op == 1) {
1774 return false;
1775 }
1776 fn = gen_VMOV_1r;
1777 }
1778 return do_1reg_imm(s, a, fn);
1779 }
1780
1781 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1782 NeonGenWidenFn *widenfn,
1783 NeonGenTwo64OpFn *opfn,
1784 int src1_mop, int src2_mop)
1785 {
1786 /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1787 TCGv_i64 rn0_64, rn1_64, rm_64;
1788
1789 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1790 return false;
1791 }
1792
1793 /* UNDEF accesses to D16-D31 if they don't exist. */
1794 if (!dc_isar_feature(aa32_simd_r32, s) &&
1795 ((a->vd | a->vn | a->vm) & 0x10)) {
1796 return false;
1797 }
1798
1799 if (!opfn) {
1800 /* size == 3 case, which is an entirely different insn group */
1801 return false;
1802 }
1803
1804 if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) {
1805 return false;
1806 }
1807
1808 if (!vfp_access_check(s)) {
1809 return true;
1810 }
1811
1812 rn0_64 = tcg_temp_new_i64();
1813 rn1_64 = tcg_temp_new_i64();
1814 rm_64 = tcg_temp_new_i64();
1815
1816 if (src1_mop >= 0) {
1817 read_neon_element64(rn0_64, a->vn, 0, src1_mop);
1818 } else {
1819 TCGv_i32 tmp = tcg_temp_new_i32();
1820 read_neon_element32(tmp, a->vn, 0, MO_32);
1821 widenfn(rn0_64, tmp);
1822 }
1823 if (src2_mop >= 0) {
1824 read_neon_element64(rm_64, a->vm, 0, src2_mop);
1825 } else {
1826 TCGv_i32 tmp = tcg_temp_new_i32();
1827 read_neon_element32(tmp, a->vm, 0, MO_32);
1828 widenfn(rm_64, tmp);
1829 }
1830
1831 opfn(rn0_64, rn0_64, rm_64);
1832
1833 /*
1834 * Load second pass inputs before storing the first pass result, to
1835 * avoid incorrect results if a narrow input overlaps with the result.
1836 */
1837 if (src1_mop >= 0) {
1838 read_neon_element64(rn1_64, a->vn, 1, src1_mop);
1839 } else {
1840 TCGv_i32 tmp = tcg_temp_new_i32();
1841 read_neon_element32(tmp, a->vn, 1, MO_32);
1842 widenfn(rn1_64, tmp);
1843 }
1844 if (src2_mop >= 0) {
1845 read_neon_element64(rm_64, a->vm, 1, src2_mop);
1846 } else {
1847 TCGv_i32 tmp = tcg_temp_new_i32();
1848 read_neon_element32(tmp, a->vm, 1, MO_32);
1849 widenfn(rm_64, tmp);
1850 }
1851
1852 write_neon_element64(rn0_64, a->vd, 0, MO_64);
1853
1854 opfn(rn1_64, rn1_64, rm_64);
1855 write_neon_element64(rn1_64, a->vd, 1, MO_64);
1856
1857 return true;
1858 }
1859
1860 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
1861 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1862 { \
1863 static NeonGenWidenFn * const widenfn[] = { \
1864 gen_helper_neon_widen_##S##8, \
1865 gen_helper_neon_widen_##S##16, \
1866 NULL, NULL, \
1867 }; \
1868 static NeonGenTwo64OpFn * const addfn[] = { \
1869 gen_helper_neon_##OP##l_u16, \
1870 gen_helper_neon_##OP##l_u32, \
1871 tcg_gen_##OP##_i64, \
1872 NULL, \
1873 }; \
1874 int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
1875 return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
1876 SRC1WIDE ? MO_UQ : narrow_mop, \
1877 narrow_mop); \
1878 }
1879
1880 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
1881 DO_PREWIDEN(VADDL_U, u, add, false, 0)
1882 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
1883 DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
1884 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
1885 DO_PREWIDEN(VADDW_U, u, add, true, 0)
1886 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
1887 DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
1888
1889 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1890 NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1891 {
1892 /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
1893 TCGv_i64 rn_64, rm_64;
1894 TCGv_i32 rd0, rd1;
1895
1896 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1897 return false;
1898 }
1899
1900 /* UNDEF accesses to D16-D31 if they don't exist. */
1901 if (!dc_isar_feature(aa32_simd_r32, s) &&
1902 ((a->vd | a->vn | a->vm) & 0x10)) {
1903 return false;
1904 }
1905
1906 if (!opfn || !narrowfn) {
1907 /* size == 3 case, which is an entirely different insn group */
1908 return false;
1909 }
1910
1911 if ((a->vn | a->vm) & 1) {
1912 return false;
1913 }
1914
1915 if (!vfp_access_check(s)) {
1916 return true;
1917 }
1918
1919 rn_64 = tcg_temp_new_i64();
1920 rm_64 = tcg_temp_new_i64();
1921 rd0 = tcg_temp_new_i32();
1922 rd1 = tcg_temp_new_i32();
1923
1924 read_neon_element64(rn_64, a->vn, 0, MO_64);
1925 read_neon_element64(rm_64, a->vm, 0, MO_64);
1926
1927 opfn(rn_64, rn_64, rm_64);
1928
1929 narrowfn(rd0, rn_64);
1930
1931 read_neon_element64(rn_64, a->vn, 1, MO_64);
1932 read_neon_element64(rm_64, a->vm, 1, MO_64);
1933
1934 opfn(rn_64, rn_64, rm_64);
1935
1936 narrowfn(rd1, rn_64);
1937
1938 write_neon_element32(rd0, a->vd, 0, MO_32);
1939 write_neon_element32(rd1, a->vd, 1, MO_32);
1940
1941 return true;
1942 }
1943
1944 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
1945 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1946 { \
1947 static NeonGenTwo64OpFn * const addfn[] = { \
1948 gen_helper_neon_##OP##l_u16, \
1949 gen_helper_neon_##OP##l_u32, \
1950 tcg_gen_##OP##_i64, \
1951 NULL, \
1952 }; \
1953 static NeonGenNarrowFn * const narrowfn[] = { \
1954 gen_helper_neon_##NARROWTYPE##_high_u8, \
1955 gen_helper_neon_##NARROWTYPE##_high_u16, \
1956 EXTOP, \
1957 NULL, \
1958 }; \
1959 return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
1960 }
1961
1962 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
1963 {
1964 tcg_gen_addi_i64(rn, rn, 1u << 31);
1965 tcg_gen_extrh_i64_i32(rd, rn);
1966 }
1967
1968 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
1969 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
1970 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
1971 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
1972
1973 static bool do_long_3d(DisasContext *s, arg_3diff *a,
1974 NeonGenTwoOpWidenFn *opfn,
1975 NeonGenTwo64OpFn *accfn)
1976 {
1977 /*
1978 * 3-regs different lengths, long operations.
1979 * These perform an operation on two inputs that returns a double-width
1980 * result, and then possibly perform an accumulation operation of
1981 * that result into the double-width destination.
1982 */
1983 TCGv_i64 rd0, rd1, tmp;
1984 TCGv_i32 rn, rm;
1985
1986 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1987 return false;
1988 }
1989
1990 /* UNDEF accesses to D16-D31 if they don't exist. */
1991 if (!dc_isar_feature(aa32_simd_r32, s) &&
1992 ((a->vd | a->vn | a->vm) & 0x10)) {
1993 return false;
1994 }
1995
1996 if (!opfn) {
1997 /* size == 3 case, which is an entirely different insn group */
1998 return false;
1999 }
2000
2001 if (a->vd & 1) {
2002 return false;
2003 }
2004
2005 if (!vfp_access_check(s)) {
2006 return true;
2007 }
2008
2009 rd0 = tcg_temp_new_i64();
2010 rd1 = tcg_temp_new_i64();
2011
2012 rn = tcg_temp_new_i32();
2013 rm = tcg_temp_new_i32();
2014 read_neon_element32(rn, a->vn, 0, MO_32);
2015 read_neon_element32(rm, a->vm, 0, MO_32);
2016 opfn(rd0, rn, rm);
2017
2018 read_neon_element32(rn, a->vn, 1, MO_32);
2019 read_neon_element32(rm, a->vm, 1, MO_32);
2020 opfn(rd1, rn, rm);
2021
2022 /* Don't store results until after all loads: they might overlap */
2023 if (accfn) {
2024 tmp = tcg_temp_new_i64();
2025 read_neon_element64(tmp, a->vd, 0, MO_64);
2026 accfn(rd0, tmp, rd0);
2027 read_neon_element64(tmp, a->vd, 1, MO_64);
2028 accfn(rd1, tmp, rd1);
2029 }
2030
2031 write_neon_element64(rd0, a->vd, 0, MO_64);
2032 write_neon_element64(rd1, a->vd, 1, MO_64);
2033
2034 return true;
2035 }
2036
2037 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
2038 {
2039 static NeonGenTwoOpWidenFn * const opfn[] = {
2040 gen_helper_neon_abdl_s16,
2041 gen_helper_neon_abdl_s32,
2042 gen_helper_neon_abdl_s64,
2043 NULL,
2044 };
2045
2046 return do_long_3d(s, a, opfn[a->size], NULL);
2047 }
2048
2049 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
2050 {
2051 static NeonGenTwoOpWidenFn * const opfn[] = {
2052 gen_helper_neon_abdl_u16,
2053 gen_helper_neon_abdl_u32,
2054 gen_helper_neon_abdl_u64,
2055 NULL,
2056 };
2057
2058 return do_long_3d(s, a, opfn[a->size], NULL);
2059 }
2060
2061 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
2062 {
2063 static NeonGenTwoOpWidenFn * const opfn[] = {
2064 gen_helper_neon_abdl_s16,
2065 gen_helper_neon_abdl_s32,
2066 gen_helper_neon_abdl_s64,
2067 NULL,
2068 };
2069 static NeonGenTwo64OpFn * const addfn[] = {
2070 gen_helper_neon_addl_u16,
2071 gen_helper_neon_addl_u32,
2072 tcg_gen_add_i64,
2073 NULL,
2074 };
2075
2076 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2077 }
2078
2079 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
2080 {
2081 static NeonGenTwoOpWidenFn * const opfn[] = {
2082 gen_helper_neon_abdl_u16,
2083 gen_helper_neon_abdl_u32,
2084 gen_helper_neon_abdl_u64,
2085 NULL,
2086 };
2087 static NeonGenTwo64OpFn * const addfn[] = {
2088 gen_helper_neon_addl_u16,
2089 gen_helper_neon_addl_u32,
2090 tcg_gen_add_i64,
2091 NULL,
2092 };
2093
2094 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2095 }
2096
2097 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2098 {
2099 TCGv_i32 lo = tcg_temp_new_i32();
2100 TCGv_i32 hi = tcg_temp_new_i32();
2101
2102 tcg_gen_muls2_i32(lo, hi, rn, rm);
2103 tcg_gen_concat_i32_i64(rd, lo, hi);
2104 }
2105
2106 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2107 {
2108 TCGv_i32 lo = tcg_temp_new_i32();
2109 TCGv_i32 hi = tcg_temp_new_i32();
2110
2111 tcg_gen_mulu2_i32(lo, hi, rn, rm);
2112 tcg_gen_concat_i32_i64(rd, lo, hi);
2113 }
2114
2115 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
2116 {
2117 static NeonGenTwoOpWidenFn * const opfn[] = {
2118 gen_helper_neon_mull_s8,
2119 gen_helper_neon_mull_s16,
2120 gen_mull_s32,
2121 NULL,
2122 };
2123
2124 return do_long_3d(s, a, opfn[a->size], NULL);
2125 }
2126
2127 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
2128 {
2129 static NeonGenTwoOpWidenFn * const opfn[] = {
2130 gen_helper_neon_mull_u8,
2131 gen_helper_neon_mull_u16,
2132 gen_mull_u32,
2133 NULL,
2134 };
2135
2136 return do_long_3d(s, a, opfn[a->size], NULL);
2137 }
2138
2139 #define DO_VMLAL(INSN,MULL,ACC) \
2140 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
2141 { \
2142 static NeonGenTwoOpWidenFn * const opfn[] = { \
2143 gen_helper_neon_##MULL##8, \
2144 gen_helper_neon_##MULL##16, \
2145 gen_##MULL##32, \
2146 NULL, \
2147 }; \
2148 static NeonGenTwo64OpFn * const accfn[] = { \
2149 gen_helper_neon_##ACC##l_u16, \
2150 gen_helper_neon_##ACC##l_u32, \
2151 tcg_gen_##ACC##_i64, \
2152 NULL, \
2153 }; \
2154 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
2155 }
2156
2157 DO_VMLAL(VMLAL_S,mull_s,add)
2158 DO_VMLAL(VMLAL_U,mull_u,add)
2159 DO_VMLAL(VMLSL_S,mull_s,sub)
2160 DO_VMLAL(VMLSL_U,mull_u,sub)
2161
2162 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2163 {
2164 gen_helper_neon_mull_s16(rd, rn, rm);
2165 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
2166 }
2167
2168 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2169 {
2170 gen_mull_s32(rd, rn, rm);
2171 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
2172 }
2173
2174 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
2175 {
2176 static NeonGenTwoOpWidenFn * const opfn[] = {
2177 NULL,
2178 gen_VQDMULL_16,
2179 gen_VQDMULL_32,
2180 NULL,
2181 };
2182
2183 return do_long_3d(s, a, opfn[a->size], NULL);
2184 }
2185
2186 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2187 {
2188 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2189 }
2190
2191 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2192 {
2193 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2194 }
2195
2196 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2197 {
2198 static NeonGenTwoOpWidenFn * const opfn[] = {
2199 NULL,
2200 gen_VQDMULL_16,
2201 gen_VQDMULL_32,
2202 NULL,
2203 };
2204 static NeonGenTwo64OpFn * const accfn[] = {
2205 NULL,
2206 gen_VQDMLAL_acc_16,
2207 gen_VQDMLAL_acc_32,
2208 NULL,
2209 };
2210
2211 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2212 }
2213
2214 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2215 {
2216 gen_helper_neon_negl_u32(rm, rm);
2217 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2218 }
2219
2220 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2221 {
2222 tcg_gen_neg_i64(rm, rm);
2223 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2224 }
2225
2226 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2227 {
2228 static NeonGenTwoOpWidenFn * const opfn[] = {
2229 NULL,
2230 gen_VQDMULL_16,
2231 gen_VQDMULL_32,
2232 NULL,
2233 };
2234 static NeonGenTwo64OpFn * const accfn[] = {
2235 NULL,
2236 gen_VQDMLSL_acc_16,
2237 gen_VQDMLSL_acc_32,
2238 NULL,
2239 };
2240
2241 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2242 }
2243
2244 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2245 {
2246 gen_helper_gvec_3 *fn_gvec;
2247
2248 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2249 return false;
2250 }
2251
2252 /* UNDEF accesses to D16-D31 if they don't exist. */
2253 if (!dc_isar_feature(aa32_simd_r32, s) &&
2254 ((a->vd | a->vn | a->vm) & 0x10)) {
2255 return false;
2256 }
2257
2258 if (a->vd & 1) {
2259 return false;
2260 }
2261
2262 switch (a->size) {
2263 case 0:
2264 fn_gvec = gen_helper_neon_pmull_h;
2265 break;
2266 case 2:
2267 if (!dc_isar_feature(aa32_pmull, s)) {
2268 return false;
2269 }
2270 fn_gvec = gen_helper_gvec_pmull_q;
2271 break;
2272 default:
2273 return false;
2274 }
2275
2276 if (!vfp_access_check(s)) {
2277 return true;
2278 }
2279
2280 tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
2281 neon_full_reg_offset(a->vn),
2282 neon_full_reg_offset(a->vm),
2283 16, 16, 0, fn_gvec);
2284 return true;
2285 }
2286
2287 static void gen_neon_dup_low16(TCGv_i32 var)
2288 {
2289 TCGv_i32 tmp = tcg_temp_new_i32();
2290 tcg_gen_ext16u_i32(var, var);
2291 tcg_gen_shli_i32(tmp, var, 16);
2292 tcg_gen_or_i32(var, var, tmp);
2293 }
2294
2295 static void gen_neon_dup_high16(TCGv_i32 var)
2296 {
2297 TCGv_i32 tmp = tcg_temp_new_i32();
2298 tcg_gen_andi_i32(var, var, 0xffff0000);
2299 tcg_gen_shri_i32(tmp, var, 16);
2300 tcg_gen_or_i32(var, var, tmp);
2301 }
2302
2303 static inline TCGv_i32 neon_get_scalar(int size, int reg)
2304 {
2305 TCGv_i32 tmp = tcg_temp_new_i32();
2306 if (size == MO_16) {
2307 read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
2308 if (reg & 8) {
2309 gen_neon_dup_high16(tmp);
2310 } else {
2311 gen_neon_dup_low16(tmp);
2312 }
2313 } else {
2314 read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
2315 }
2316 return tmp;
2317 }
2318
2319 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2320 NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2321 {
2322 /*
2323 * Two registers and a scalar: perform an operation between
2324 * the input elements and the scalar, and then possibly
2325 * perform an accumulation operation of that result into the
2326 * destination.
2327 */
2328 TCGv_i32 scalar, tmp;
2329 int pass;
2330
2331 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2332 return false;
2333 }
2334
2335 /* UNDEF accesses to D16-D31 if they don't exist. */
2336 if (!dc_isar_feature(aa32_simd_r32, s) &&
2337 ((a->vd | a->vn | a->vm) & 0x10)) {
2338 return false;
2339 }
2340
2341 if (!opfn) {
2342 /* Bad size (including size == 3, which is a different insn group) */
2343 return false;
2344 }
2345
2346 if (a->q && ((a->vd | a->vn) & 1)) {
2347 return false;
2348 }
2349
2350 if (!vfp_access_check(s)) {
2351 return true;
2352 }
2353
2354 scalar = neon_get_scalar(a->size, a->vm);
2355 tmp = tcg_temp_new_i32();
2356
2357 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2358 read_neon_element32(tmp, a->vn, pass, MO_32);
2359 opfn(tmp, tmp, scalar);
2360 if (accfn) {
2361 TCGv_i32 rd = tcg_temp_new_i32();
2362 read_neon_element32(rd, a->vd, pass, MO_32);
2363 accfn(tmp, rd, tmp);
2364 }
2365 write_neon_element32(tmp, a->vd, pass, MO_32);
2366 }
2367 return true;
2368 }
2369
2370 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2371 {
2372 static NeonGenTwoOpFn * const opfn[] = {
2373 NULL,
2374 gen_helper_neon_mul_u16,
2375 tcg_gen_mul_i32,
2376 NULL,
2377 };
2378
2379 return do_2scalar(s, a, opfn[a->size], NULL);
2380 }
2381
2382 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2383 {
2384 static NeonGenTwoOpFn * const opfn[] = {
2385 NULL,
2386 gen_helper_neon_mul_u16,
2387 tcg_gen_mul_i32,
2388 NULL,
2389 };
2390 static NeonGenTwoOpFn * const accfn[] = {
2391 NULL,
2392 gen_helper_neon_add_u16,
2393 tcg_gen_add_i32,
2394 NULL,
2395 };
2396
2397 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2398 }
2399
2400 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2401 {
2402 static NeonGenTwoOpFn * const opfn[] = {
2403 NULL,
2404 gen_helper_neon_mul_u16,
2405 tcg_gen_mul_i32,
2406 NULL,
2407 };
2408 static NeonGenTwoOpFn * const accfn[] = {
2409 NULL,
2410 gen_helper_neon_sub_u16,
2411 tcg_gen_sub_i32,
2412 NULL,
2413 };
2414
2415 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2416 }
2417
2418 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2419 gen_helper_gvec_3_ptr *fn)
2420 {
2421 /* Two registers and a scalar, using gvec */
2422 int vec_size = a->q ? 16 : 8;
2423 int rd_ofs = neon_full_reg_offset(a->vd);
2424 int rn_ofs = neon_full_reg_offset(a->vn);
2425 int rm_ofs;
2426 int idx;
2427 TCGv_ptr fpstatus;
2428
2429 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2430 return false;
2431 }
2432
2433 /* UNDEF accesses to D16-D31 if they don't exist. */
2434 if (!dc_isar_feature(aa32_simd_r32, s) &&
2435 ((a->vd | a->vn | a->vm) & 0x10)) {
2436 return false;
2437 }
2438
2439 if (!fn) {
2440 /* Bad size (including size == 3, which is a different insn group) */
2441 return false;
2442 }
2443
2444 if (a->q && ((a->vd | a->vn) & 1)) {
2445 return false;
2446 }
2447
2448 if (!vfp_access_check(s)) {
2449 return true;
2450 }
2451
2452 /* a->vm is M:Vm, which encodes both register and index */
2453 idx = extract32(a->vm, a->size + 2, 2);
2454 a->vm = extract32(a->vm, 0, a->size + 2);
2455 rm_ofs = neon_full_reg_offset(a->vm);
2456
2457 fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2458 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2459 vec_size, vec_size, idx, fn);
2460 return true;
2461 }
2462
2463 #define DO_VMUL_F_2sc(NAME, FUNC) \
2464 static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \
2465 { \
2466 static gen_helper_gvec_3_ptr * const opfn[] = { \
2467 NULL, \
2468 gen_helper_##FUNC##_h, \
2469 gen_helper_##FUNC##_s, \
2470 NULL, \
2471 }; \
2472 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2473 return false; \
2474 } \
2475 return do_2scalar_fp_vec(s, a, opfn[a->size]); \
2476 }
2477
2478 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2479 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2480 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2481
2482 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2483 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2484 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2485 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2486
2487 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2488 {
2489 static NeonGenTwoOpFn * const opfn[] = {
2490 NULL,
2491 gen_VQDMULH_16,
2492 gen_VQDMULH_32,
2493 NULL,
2494 };
2495
2496 return do_2scalar(s, a, opfn[a->size], NULL);
2497 }
2498
2499 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2500 {
2501 static NeonGenTwoOpFn * const opfn[] = {
2502 NULL,
2503 gen_VQRDMULH_16,
2504 gen_VQRDMULH_32,
2505 NULL,
2506 };
2507
2508 return do_2scalar(s, a, opfn[a->size], NULL);
2509 }
2510
2511 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2512 NeonGenThreeOpEnvFn *opfn)
2513 {
2514 /*
2515 * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2516 * performs a kind of fused op-then-accumulate using a helper
2517 * function that takes all of rd, rn and the scalar at once.
2518 */
2519 TCGv_i32 scalar, rn, rd;
2520 int pass;
2521
2522 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2523 return false;
2524 }
2525
2526 if (!dc_isar_feature(aa32_rdm, s)) {
2527 return false;
2528 }
2529
2530 /* UNDEF accesses to D16-D31 if they don't exist. */
2531 if (!dc_isar_feature(aa32_simd_r32, s) &&
2532 ((a->vd | a->vn | a->vm) & 0x10)) {
2533 return false;
2534 }
2535
2536 if (!opfn) {
2537 /* Bad size (including size == 3, which is a different insn group) */
2538 return false;
2539 }
2540
2541 if (a->q && ((a->vd | a->vn) & 1)) {
2542 return false;
2543 }
2544
2545 if (!vfp_access_check(s)) {
2546 return true;
2547 }
2548
2549 scalar = neon_get_scalar(a->size, a->vm);
2550 rn = tcg_temp_new_i32();
2551 rd = tcg_temp_new_i32();
2552
2553 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2554 read_neon_element32(rn, a->vn, pass, MO_32);
2555 read_neon_element32(rd, a->vd, pass, MO_32);
2556 opfn(rd, cpu_env, rn, scalar, rd);
2557 write_neon_element32(rd, a->vd, pass, MO_32);
2558 }
2559 return true;
2560 }
2561
2562 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2563 {
2564 static NeonGenThreeOpEnvFn *opfn[] = {
2565 NULL,
2566 gen_helper_neon_qrdmlah_s16,
2567 gen_helper_neon_qrdmlah_s32,
2568 NULL,
2569 };
2570 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2571 }
2572
2573 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2574 {
2575 static NeonGenThreeOpEnvFn *opfn[] = {
2576 NULL,
2577 gen_helper_neon_qrdmlsh_s16,
2578 gen_helper_neon_qrdmlsh_s32,
2579 NULL,
2580 };
2581 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2582 }
2583
2584 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2585 NeonGenTwoOpWidenFn *opfn,
2586 NeonGenTwo64OpFn *accfn)
2587 {
2588 /*
2589 * Two registers and a scalar, long operations: perform an
2590 * operation on the input elements and the scalar which produces
2591 * a double-width result, and then possibly perform an accumulation
2592 * operation of that result into the destination.
2593 */
2594 TCGv_i32 scalar, rn;
2595 TCGv_i64 rn0_64, rn1_64;
2596
2597 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2598 return false;
2599 }
2600
2601 /* UNDEF accesses to D16-D31 if they don't exist. */
2602 if (!dc_isar_feature(aa32_simd_r32, s) &&
2603 ((a->vd | a->vn | a->vm) & 0x10)) {
2604 return false;
2605 }
2606
2607 if (!opfn) {
2608 /* Bad size (including size == 3, which is a different insn group) */
2609 return false;
2610 }
2611
2612 if (a->vd & 1) {
2613 return false;
2614 }
2615
2616 if (!vfp_access_check(s)) {
2617 return true;
2618 }
2619
2620 scalar = neon_get_scalar(a->size, a->vm);
2621
2622 /* Load all inputs before writing any outputs, in case of overlap */
2623 rn = tcg_temp_new_i32();
2624 read_neon_element32(rn, a->vn, 0, MO_32);
2625 rn0_64 = tcg_temp_new_i64();
2626 opfn(rn0_64, rn, scalar);
2627
2628 read_neon_element32(rn, a->vn, 1, MO_32);
2629 rn1_64 = tcg_temp_new_i64();
2630 opfn(rn1_64, rn, scalar);
2631
2632 if (accfn) {
2633 TCGv_i64 t64 = tcg_temp_new_i64();
2634 read_neon_element64(t64, a->vd, 0, MO_64);
2635 accfn(rn0_64, t64, rn0_64);
2636 read_neon_element64(t64, a->vd, 1, MO_64);
2637 accfn(rn1_64, t64, rn1_64);
2638 }
2639
2640 write_neon_element64(rn0_64, a->vd, 0, MO_64);
2641 write_neon_element64(rn1_64, a->vd, 1, MO_64);
2642 return true;
2643 }
2644
2645 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2646 {
2647 static NeonGenTwoOpWidenFn * const opfn[] = {
2648 NULL,
2649 gen_helper_neon_mull_s16,
2650 gen_mull_s32,
2651 NULL,
2652 };
2653
2654 return do_2scalar_long(s, a, opfn[a->size], NULL);
2655 }
2656
2657 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2658 {
2659 static NeonGenTwoOpWidenFn * const opfn[] = {
2660 NULL,
2661 gen_helper_neon_mull_u16,
2662 gen_mull_u32,
2663 NULL,
2664 };
2665
2666 return do_2scalar_long(s, a, opfn[a->size], NULL);
2667 }
2668
2669 #define DO_VMLAL_2SC(INSN, MULL, ACC) \
2670 static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
2671 { \
2672 static NeonGenTwoOpWidenFn * const opfn[] = { \
2673 NULL, \
2674 gen_helper_neon_##MULL##16, \
2675 gen_##MULL##32, \
2676 NULL, \
2677 }; \
2678 static NeonGenTwo64OpFn * const accfn[] = { \
2679 NULL, \
2680 gen_helper_neon_##ACC##l_u32, \
2681 tcg_gen_##ACC##_i64, \
2682 NULL, \
2683 }; \
2684 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
2685 }
2686
2687 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2688 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2689 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2690 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2691
2692 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2693 {
2694 static NeonGenTwoOpWidenFn * const opfn[] = {
2695 NULL,
2696 gen_VQDMULL_16,
2697 gen_VQDMULL_32,
2698 NULL,
2699 };
2700
2701 return do_2scalar_long(s, a, opfn[a->size], NULL);
2702 }
2703
2704 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2705 {
2706 static NeonGenTwoOpWidenFn * const opfn[] = {
2707 NULL,
2708 gen_VQDMULL_16,
2709 gen_VQDMULL_32,
2710 NULL,
2711 };
2712 static NeonGenTwo64OpFn * const accfn[] = {
2713 NULL,
2714 gen_VQDMLAL_acc_16,
2715 gen_VQDMLAL_acc_32,
2716 NULL,
2717 };
2718
2719 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2720 }
2721
2722 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2723 {
2724 static NeonGenTwoOpWidenFn * const opfn[] = {
2725 NULL,
2726 gen_VQDMULL_16,
2727 gen_VQDMULL_32,
2728 NULL,
2729 };
2730 static NeonGenTwo64OpFn * const accfn[] = {
2731 NULL,
2732 gen_VQDMLSL_acc_16,
2733 gen_VQDMLSL_acc_32,
2734 NULL,
2735 };
2736
2737 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2738 }
2739
2740 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2741 {
2742 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2743 return false;
2744 }
2745
2746 /* UNDEF accesses to D16-D31 if they don't exist. */
2747 if (!dc_isar_feature(aa32_simd_r32, s) &&
2748 ((a->vd | a->vn | a->vm) & 0x10)) {
2749 return false;
2750 }
2751
2752 if ((a->vn | a->vm | a->vd) & a->q) {
2753 return false;
2754 }
2755
2756 if (a->imm > 7 && !a->q) {
2757 return false;
2758 }
2759
2760 if (!vfp_access_check(s)) {
2761 return true;
2762 }
2763
2764 if (!a->q) {
2765 /* Extract 64 bits from <Vm:Vn> */
2766 TCGv_i64 left, right, dest;
2767
2768 left = tcg_temp_new_i64();
2769 right = tcg_temp_new_i64();
2770 dest = tcg_temp_new_i64();
2771
2772 read_neon_element64(right, a->vn, 0, MO_64);
2773 read_neon_element64(left, a->vm, 0, MO_64);
2774 tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2775 write_neon_element64(dest, a->vd, 0, MO_64);
2776 } else {
2777 /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2778 TCGv_i64 left, middle, right, destleft, destright;
2779
2780 left = tcg_temp_new_i64();
2781 middle = tcg_temp_new_i64();
2782 right = tcg_temp_new_i64();
2783 destleft = tcg_temp_new_i64();
2784 destright = tcg_temp_new_i64();
2785
2786 if (a->imm < 8) {
2787 read_neon_element64(right, a->vn, 0, MO_64);
2788 read_neon_element64(middle, a->vn, 1, MO_64);
2789 tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2790 read_neon_element64(left, a->vm, 0, MO_64);
2791 tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2792 } else {
2793 read_neon_element64(right, a->vn, 1, MO_64);
2794 read_neon_element64(middle, a->vm, 0, MO_64);
2795 tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2796 read_neon_element64(left, a->vm, 1, MO_64);
2797 tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2798 }
2799
2800 write_neon_element64(destright, a->vd, 0, MO_64);
2801 write_neon_element64(destleft, a->vd, 1, MO_64);
2802 }
2803 return true;
2804 }
2805
2806 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2807 {
2808 TCGv_i64 val, def;
2809 TCGv_i32 desc;
2810
2811 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2812 return false;
2813 }
2814
2815 /* UNDEF accesses to D16-D31 if they don't exist. */
2816 if (!dc_isar_feature(aa32_simd_r32, s) &&
2817 ((a->vd | a->vn | a->vm) & 0x10)) {
2818 return false;
2819 }
2820
2821 if ((a->vn + a->len + 1) > 32) {
2822 /*
2823 * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2824 * helper function running off the end of the register file.
2825 */
2826 return false;
2827 }
2828
2829 if (!vfp_access_check(s)) {
2830 return true;
2831 }
2832
2833 desc = tcg_constant_i32((a->vn << 2) | a->len);
2834 def = tcg_temp_new_i64();
2835 if (a->op) {
2836 read_neon_element64(def, a->vd, 0, MO_64);
2837 } else {
2838 tcg_gen_movi_i64(def, 0);
2839 }
2840 val = tcg_temp_new_i64();
2841 read_neon_element64(val, a->vm, 0, MO_64);
2842
2843 gen_helper_neon_tbl(val, cpu_env, desc, val, def);
2844 write_neon_element64(val, a->vd, 0, MO_64);
2845 return true;
2846 }
2847
2848 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2849 {
2850 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2851 return false;
2852 }
2853
2854 /* UNDEF accesses to D16-D31 if they don't exist. */
2855 if (!dc_isar_feature(aa32_simd_r32, s) &&
2856 ((a->vd | a->vm) & 0x10)) {
2857 return false;
2858 }
2859
2860 if (a->vd & a->q) {
2861 return false;
2862 }
2863
2864 if (!vfp_access_check(s)) {
2865 return true;
2866 }
2867
2868 tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
2869 neon_element_offset(a->vm, a->index, a->size),
2870 a->q ? 16 : 8, a->q ? 16 : 8);
2871 return true;
2872 }
2873
2874 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
2875 {
2876 int pass, half;
2877 TCGv_i32 tmp[2];
2878
2879 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2880 return false;
2881 }
2882
2883 /* UNDEF accesses to D16-D31 if they don't exist. */
2884 if (!dc_isar_feature(aa32_simd_r32, s) &&
2885 ((a->vd | a->vm) & 0x10)) {
2886 return false;
2887 }
2888
2889 if ((a->vd | a->vm) & a->q) {
2890 return false;
2891 }
2892
2893 if (a->size == 3) {
2894 return false;
2895 }
2896
2897 if (!vfp_access_check(s)) {
2898 return true;
2899 }
2900
2901 tmp[0] = tcg_temp_new_i32();
2902 tmp[1] = tcg_temp_new_i32();
2903
2904 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
2905 for (half = 0; half < 2; half++) {
2906 read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
2907 switch (a->size) {
2908 case 0:
2909 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
2910 break;
2911 case 1:
2912 gen_swap_half(tmp[half], tmp[half]);
2913 break;
2914 case 2:
2915 break;
2916 default:
2917 g_assert_not_reached();
2918 }
2919 }
2920 write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
2921 write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
2922 }
2923 return true;
2924 }
2925
2926 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
2927 NeonGenWidenFn *widenfn,
2928 NeonGenTwo64OpFn *opfn,
2929 NeonGenTwo64OpFn *accfn)
2930 {
2931 /*
2932 * Pairwise long operations: widen both halves of the pair,
2933 * combine the pairs with the opfn, and then possibly accumulate
2934 * into the destination with the accfn.
2935 */
2936 int pass;
2937
2938 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2939 return false;
2940 }
2941
2942 /* UNDEF accesses to D16-D31 if they don't exist. */
2943 if (!dc_isar_feature(aa32_simd_r32, s) &&
2944 ((a->vd | a->vm) & 0x10)) {
2945 return false;
2946 }
2947
2948 if ((a->vd | a->vm) & a->q) {
2949 return false;
2950 }
2951
2952 if (!widenfn) {
2953 return false;
2954 }
2955
2956 if (!vfp_access_check(s)) {
2957 return true;
2958 }
2959
2960 for (pass = 0; pass < a->q + 1; pass++) {
2961 TCGv_i32 tmp;
2962 TCGv_i64 rm0_64, rm1_64, rd_64;
2963
2964 rm0_64 = tcg_temp_new_i64();
2965 rm1_64 = tcg_temp_new_i64();
2966 rd_64 = tcg_temp_new_i64();
2967
2968 tmp = tcg_temp_new_i32();
2969 read_neon_element32(tmp, a->vm, pass * 2, MO_32);
2970 widenfn(rm0_64, tmp);
2971 read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
2972 widenfn(rm1_64, tmp);
2973
2974 opfn(rd_64, rm0_64, rm1_64);
2975
2976 if (accfn) {
2977 TCGv_i64 tmp64 = tcg_temp_new_i64();
2978 read_neon_element64(tmp64, a->vd, pass, MO_64);
2979 accfn(rd_64, tmp64, rd_64);
2980 }
2981 write_neon_element64(rd_64, a->vd, pass, MO_64);
2982 }
2983 return true;
2984 }
2985
2986 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
2987 {
2988 static NeonGenWidenFn * const widenfn[] = {
2989 gen_helper_neon_widen_s8,
2990 gen_helper_neon_widen_s16,
2991 tcg_gen_ext_i32_i64,
2992 NULL,
2993 };
2994 static NeonGenTwo64OpFn * const opfn[] = {
2995 gen_helper_neon_paddl_u16,
2996 gen_helper_neon_paddl_u32,
2997 tcg_gen_add_i64,
2998 NULL,
2999 };
3000
3001 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3002 }
3003
3004 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
3005 {
3006 static NeonGenWidenFn * const widenfn[] = {
3007 gen_helper_neon_widen_u8,
3008 gen_helper_neon_widen_u16,
3009 tcg_gen_extu_i32_i64,
3010 NULL,
3011 };
3012 static NeonGenTwo64OpFn * const opfn[] = {
3013 gen_helper_neon_paddl_u16,
3014 gen_helper_neon_paddl_u32,
3015 tcg_gen_add_i64,
3016 NULL,
3017 };
3018
3019 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3020 }
3021
3022 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
3023 {
3024 static NeonGenWidenFn * const widenfn[] = {
3025 gen_helper_neon_widen_s8,
3026 gen_helper_neon_widen_s16,
3027 tcg_gen_ext_i32_i64,
3028 NULL,
3029 };
3030 static NeonGenTwo64OpFn * const opfn[] = {
3031 gen_helper_neon_paddl_u16,
3032 gen_helper_neon_paddl_u32,
3033 tcg_gen_add_i64,
3034 NULL,
3035 };
3036 static NeonGenTwo64OpFn * const accfn[] = {
3037 gen_helper_neon_addl_u16,
3038 gen_helper_neon_addl_u32,
3039 tcg_gen_add_i64,
3040 NULL,
3041 };
3042
3043 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3044 accfn[a->size]);
3045 }
3046
3047 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
3048 {
3049 static NeonGenWidenFn * const widenfn[] = {
3050 gen_helper_neon_widen_u8,
3051 gen_helper_neon_widen_u16,
3052 tcg_gen_extu_i32_i64,
3053 NULL,
3054 };
3055 static NeonGenTwo64OpFn * const opfn[] = {
3056 gen_helper_neon_paddl_u16,
3057 gen_helper_neon_paddl_u32,
3058 tcg_gen_add_i64,
3059 NULL,
3060 };
3061 static NeonGenTwo64OpFn * const accfn[] = {
3062 gen_helper_neon_addl_u16,
3063 gen_helper_neon_addl_u32,
3064 tcg_gen_add_i64,
3065 NULL,
3066 };
3067
3068 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3069 accfn[a->size]);
3070 }
3071
3072 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
3073
3074 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
3075 ZipFn *fn)
3076 {
3077 TCGv_ptr pd, pm;
3078
3079 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3080 return false;
3081 }
3082
3083 /* UNDEF accesses to D16-D31 if they don't exist. */
3084 if (!dc_isar_feature(aa32_simd_r32, s) &&
3085 ((a->vd | a->vm) & 0x10)) {
3086 return false;
3087 }
3088
3089 if ((a->vd | a->vm) & a->q) {
3090 return false;
3091 }
3092
3093 if (!fn) {
3094 /* Bad size or size/q combination */
3095 return false;
3096 }
3097
3098 if (!vfp_access_check(s)) {
3099 return true;
3100 }
3101
3102 pd = vfp_reg_ptr(true, a->vd);
3103 pm = vfp_reg_ptr(true, a->vm);
3104 fn(pd, pm);
3105 return true;
3106 }
3107
3108 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
3109 {
3110 static ZipFn * const fn[2][4] = {
3111 {
3112 gen_helper_neon_unzip8,
3113 gen_helper_neon_unzip16,
3114 NULL,
3115 NULL,
3116 }, {
3117 gen_helper_neon_qunzip8,
3118 gen_helper_neon_qunzip16,
3119 gen_helper_neon_qunzip32,
3120 NULL,
3121 }
3122 };
3123 return do_zip_uzp(s, a, fn[a->q][a->size]);
3124 }
3125
3126 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
3127 {
3128 static ZipFn * const fn[2][4] = {
3129 {
3130 gen_helper_neon_zip8,
3131 gen_helper_neon_zip16,
3132 NULL,
3133 NULL,
3134 }, {
3135 gen_helper_neon_qzip8,
3136 gen_helper_neon_qzip16,
3137 gen_helper_neon_qzip32,
3138 NULL,
3139 }
3140 };
3141 return do_zip_uzp(s, a, fn[a->q][a->size]);
3142 }
3143
3144 static bool do_vmovn(DisasContext *s, arg_2misc *a,
3145 NeonGenNarrowEnvFn *narrowfn)
3146 {
3147 TCGv_i64 rm;
3148 TCGv_i32 rd0, rd1;
3149
3150 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3151 return false;
3152 }
3153
3154 /* UNDEF accesses to D16-D31 if they don't exist. */
3155 if (!dc_isar_feature(aa32_simd_r32, s) &&
3156 ((a->vd | a->vm) & 0x10)) {
3157 return false;
3158 }
3159
3160 if (a->vm & 1) {
3161 return false;
3162 }
3163
3164 if (!narrowfn) {
3165 return false;
3166 }
3167
3168 if (!vfp_access_check(s)) {
3169 return true;
3170 }
3171
3172 rm = tcg_temp_new_i64();
3173 rd0 = tcg_temp_new_i32();
3174 rd1 = tcg_temp_new_i32();
3175
3176 read_neon_element64(rm, a->vm, 0, MO_64);
3177 narrowfn(rd0, cpu_env, rm);
3178 read_neon_element64(rm, a->vm, 1, MO_64);
3179 narrowfn(rd1, cpu_env, rm);
3180 write_neon_element32(rd0, a->vd, 0, MO_32);
3181 write_neon_element32(rd1, a->vd, 1, MO_32);
3182 return true;
3183 }
3184
3185 #define DO_VMOVN(INSN, FUNC) \
3186 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3187 { \
3188 static NeonGenNarrowEnvFn * const narrowfn[] = { \
3189 FUNC##8, \
3190 FUNC##16, \
3191 FUNC##32, \
3192 NULL, \
3193 }; \
3194 return do_vmovn(s, a, narrowfn[a->size]); \
3195 }
3196
3197 DO_VMOVN(VMOVN, gen_neon_narrow_u)
3198 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3199 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3200 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3201
3202 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3203 {
3204 TCGv_i32 rm0, rm1;
3205 TCGv_i64 rd;
3206 static NeonGenWidenFn * const widenfns[] = {
3207 gen_helper_neon_widen_u8,
3208 gen_helper_neon_widen_u16,
3209 tcg_gen_extu_i32_i64,
3210 NULL,
3211 };
3212 NeonGenWidenFn *widenfn = widenfns[a->size];
3213
3214 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3215 return false;
3216 }
3217
3218 /* UNDEF accesses to D16-D31 if they don't exist. */
3219 if (!dc_isar_feature(aa32_simd_r32, s) &&
3220 ((a->vd | a->vm) & 0x10)) {
3221 return false;
3222 }
3223
3224 if (a->vd & 1) {
3225 return false;
3226 }
3227
3228 if (!widenfn) {
3229 return false;
3230 }
3231
3232 if (!vfp_access_check(s)) {
3233 return true;
3234 }
3235
3236 rd = tcg_temp_new_i64();
3237 rm0 = tcg_temp_new_i32();
3238 rm1 = tcg_temp_new_i32();
3239
3240 read_neon_element32(rm0, a->vm, 0, MO_32);
3241 read_neon_element32(rm1, a->vm, 1, MO_32);
3242
3243 widenfn(rd, rm0);
3244 tcg_gen_shli_i64(rd, rd, 8 << a->size);
3245 write_neon_element64(rd, a->vd, 0, MO_64);
3246 widenfn(rd, rm1);
3247 tcg_gen_shli_i64(rd, rd, 8 << a->size);
3248 write_neon_element64(rd, a->vd, 1, MO_64);
3249 return true;
3250 }
3251
3252 static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
3253 {
3254 TCGv_ptr fpst;
3255 TCGv_i64 tmp;
3256 TCGv_i32 dst0, dst1;
3257
3258 if (!dc_isar_feature(aa32_bf16, s)) {
3259 return false;
3260 }
3261
3262 /* UNDEF accesses to D16-D31 if they don't exist. */
3263 if (!dc_isar_feature(aa32_simd_r32, s) &&
3264 ((a->vd | a->vm) & 0x10)) {
3265 return false;
3266 }
3267
3268 if ((a->vm & 1) || (a->size != 1)) {
3269 return false;
3270 }
3271
3272 if (!vfp_access_check(s)) {
3273 return true;
3274 }
3275
3276 fpst = fpstatus_ptr(FPST_STD);
3277 tmp = tcg_temp_new_i64();
3278 dst0 = tcg_temp_new_i32();
3279 dst1 = tcg_temp_new_i32();
3280
3281 read_neon_element64(tmp, a->vm, 0, MO_64);
3282 gen_helper_bfcvt_pair(dst0, tmp, fpst);
3283
3284 read_neon_element64(tmp, a->vm, 1, MO_64);
3285 gen_helper_bfcvt_pair(dst1, tmp, fpst);
3286
3287 write_neon_element32(dst0, a->vd, 0, MO_32);
3288 write_neon_element32(dst1, a->vd, 1, MO_32);
3289 return true;
3290 }
3291
3292 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3293 {
3294 TCGv_ptr fpst;
3295 TCGv_i32 ahp, tmp, tmp2, tmp3;
3296
3297 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3298 !dc_isar_feature(aa32_fp16_spconv, s)) {
3299 return false;
3300 }
3301
3302 /* UNDEF accesses to D16-D31 if they don't exist. */
3303 if (!dc_isar_feature(aa32_simd_r32, s) &&
3304 ((a->vd | a->vm) & 0x10)) {
3305 return false;
3306 }
3307
3308 if ((a->vm & 1) || (a->size != 1)) {
3309 return false;
3310 }
3311
3312 if (!vfp_access_check(s)) {
3313 return true;
3314 }
3315
3316 fpst = fpstatus_ptr(FPST_STD);
3317 ahp = get_ahp_flag();
3318 tmp = tcg_temp_new_i32();
3319 read_neon_element32(tmp, a->vm, 0, MO_32);
3320 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3321 tmp2 = tcg_temp_new_i32();
3322 read_neon_element32(tmp2, a->vm, 1, MO_32);
3323 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3324 tcg_gen_shli_i32(tmp2, tmp2, 16);
3325 tcg_gen_or_i32(tmp2, tmp2, tmp);
3326 read_neon_element32(tmp, a->vm, 2, MO_32);
3327 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3328 tmp3 = tcg_temp_new_i32();
3329 read_neon_element32(tmp3, a->vm, 3, MO_32);
3330 write_neon_element32(tmp2, a->vd, 0, MO_32);
3331 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3332 tcg_gen_shli_i32(tmp3, tmp3, 16);
3333 tcg_gen_or_i32(tmp3, tmp3, tmp);
3334 write_neon_element32(tmp3, a->vd, 1, MO_32);
3335 return true;
3336 }
3337
3338 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3339 {
3340 TCGv_ptr fpst;
3341 TCGv_i32 ahp, tmp, tmp2, tmp3;
3342
3343 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3344 !dc_isar_feature(aa32_fp16_spconv, s)) {
3345 return false;
3346 }
3347
3348 /* UNDEF accesses to D16-D31 if they don't exist. */
3349 if (!dc_isar_feature(aa32_simd_r32, s) &&
3350 ((a->vd | a->vm) & 0x10)) {
3351 return false;
3352 }
3353
3354 if ((a->vd & 1) || (a->size != 1)) {
3355 return false;
3356 }
3357
3358 if (!vfp_access_check(s)) {
3359 return true;
3360 }
3361
3362 fpst = fpstatus_ptr(FPST_STD);
3363 ahp = get_ahp_flag();
3364 tmp3 = tcg_temp_new_i32();
3365 tmp2 = tcg_temp_new_i32();
3366 tmp = tcg_temp_new_i32();
3367 read_neon_element32(tmp, a->vm, 0, MO_32);
3368 read_neon_element32(tmp2, a->vm, 1, MO_32);
3369 tcg_gen_ext16u_i32(tmp3, tmp);
3370 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3371 write_neon_element32(tmp3, a->vd, 0, MO_32);
3372 tcg_gen_shri_i32(tmp, tmp, 16);
3373 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3374 write_neon_element32(tmp, a->vd, 1, MO_32);
3375 tcg_gen_ext16u_i32(tmp3, tmp2);
3376 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3377 write_neon_element32(tmp3, a->vd, 2, MO_32);
3378 tcg_gen_shri_i32(tmp2, tmp2, 16);
3379 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3380 write_neon_element32(tmp2, a->vd, 3, MO_32);
3381 return true;
3382 }
3383
3384 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3385 {
3386 int vec_size = a->q ? 16 : 8;
3387 int rd_ofs = neon_full_reg_offset(a->vd);
3388 int rm_ofs = neon_full_reg_offset(a->vm);
3389
3390 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3391 return false;
3392 }
3393
3394 /* UNDEF accesses to D16-D31 if they don't exist. */
3395 if (!dc_isar_feature(aa32_simd_r32, s) &&
3396 ((a->vd | a->vm) & 0x10)) {
3397 return false;
3398 }
3399
3400 if (a->size == 3) {
3401 return false;
3402 }
3403
3404 if ((a->vd | a->vm) & a->q) {
3405 return false;
3406 }
3407
3408 if (!vfp_access_check(s)) {
3409 return true;
3410 }
3411
3412 fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3413
3414 return true;
3415 }
3416
3417 #define DO_2MISC_VEC(INSN, FN) \
3418 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3419 { \
3420 return do_2misc_vec(s, a, FN); \
3421 }
3422
3423 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3424 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3425 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3426 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3427 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3428 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3429 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3430
3431 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3432 {
3433 if (a->size != 0) {
3434 return false;
3435 }
3436 return do_2misc_vec(s, a, tcg_gen_gvec_not);
3437 }
3438
3439 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
3440 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3441 uint32_t rm_ofs, uint32_t oprsz, \
3442 uint32_t maxsz) \
3443 { \
3444 tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \
3445 DATA, FUNC); \
3446 }
3447
3448 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \
3449 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3450 uint32_t rm_ofs, uint32_t oprsz, \
3451 uint32_t maxsz) \
3452 { \
3453 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \
3454 }
3455
3456 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3457 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
3458 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3459 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
3460 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3461 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3462 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3463
3464 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \
3465 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3466 { \
3467 if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \
3468 return false; \
3469 } \
3470 return do_2misc_vec(s, a, gen_##INSN); \
3471 }
3472
3473 DO_2M_CRYPTO(AESE, aa32_aes, 0)
3474 DO_2M_CRYPTO(AESD, aa32_aes, 0)
3475 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3476 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3477 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3478 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3479 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3480
3481 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3482 {
3483 TCGv_i32 tmp;
3484 int pass;
3485
3486 /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3487 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3488 return false;
3489 }
3490
3491 /* UNDEF accesses to D16-D31 if they don't exist. */
3492 if (!dc_isar_feature(aa32_simd_r32, s) &&
3493 ((a->vd | a->vm) & 0x10)) {
3494 return false;
3495 }
3496
3497 if (!fn) {
3498 return false;
3499 }
3500
3501 if ((a->vd | a->vm) & a->q) {
3502 return false;
3503 }
3504
3505 if (!vfp_access_check(s)) {
3506 return true;
3507 }
3508
3509 tmp = tcg_temp_new_i32();
3510 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3511 read_neon_element32(tmp, a->vm, pass, MO_32);
3512 fn(tmp, tmp);
3513 write_neon_element32(tmp, a->vd, pass, MO_32);
3514 }
3515 return true;
3516 }
3517
3518 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3519 {
3520 static NeonGenOneOpFn * const fn[] = {
3521 tcg_gen_bswap32_i32,
3522 gen_swap_half,
3523 NULL,
3524 NULL,
3525 };
3526 return do_2misc(s, a, fn[a->size]);
3527 }
3528
3529 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3530 {
3531 if (a->size != 0) {
3532 return false;
3533 }
3534 return do_2misc(s, a, gen_rev16);
3535 }
3536
3537 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3538 {
3539 static NeonGenOneOpFn * const fn[] = {
3540 gen_helper_neon_cls_s8,
3541 gen_helper_neon_cls_s16,
3542 gen_helper_neon_cls_s32,
3543 NULL,
3544 };
3545 return do_2misc(s, a, fn[a->size]);
3546 }
3547
3548 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3549 {
3550 tcg_gen_clzi_i32(rd, rm, 32);
3551 }
3552
3553 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3554 {
3555 static NeonGenOneOpFn * const fn[] = {
3556 gen_helper_neon_clz_u8,
3557 gen_helper_neon_clz_u16,
3558 do_VCLZ_32,
3559 NULL,
3560 };
3561 return do_2misc(s, a, fn[a->size]);
3562 }
3563
3564 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3565 {
3566 if (a->size != 0) {
3567 return false;
3568 }
3569 return do_2misc(s, a, gen_helper_neon_cnt_u8);
3570 }
3571
3572 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3573 uint32_t oprsz, uint32_t maxsz)
3574 {
3575 tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3576 vece == MO_16 ? 0x7fff : 0x7fffffff,
3577 oprsz, maxsz);
3578 }
3579
3580 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3581 {
3582 if (a->size == MO_16) {
3583 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3584 return false;
3585 }
3586 } else if (a->size != MO_32) {
3587 return false;
3588 }
3589 return do_2misc_vec(s, a, gen_VABS_F);
3590 }
3591
3592 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3593 uint32_t oprsz, uint32_t maxsz)
3594 {
3595 tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3596 vece == MO_16 ? 0x8000 : 0x80000000,
3597 oprsz, maxsz);
3598 }
3599
3600 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3601 {
3602 if (a->size == MO_16) {
3603 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3604 return false;
3605 }
3606 } else if (a->size != MO_32) {
3607 return false;
3608 }
3609 return do_2misc_vec(s, a, gen_VNEG_F);
3610 }
3611
3612 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3613 {
3614 if (a->size != 2) {
3615 return false;
3616 }
3617 return do_2misc(s, a, gen_helper_recpe_u32);
3618 }
3619
3620 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3621 {
3622 if (a->size != 2) {
3623 return false;
3624 }
3625 return do_2misc(s, a, gen_helper_rsqrte_u32);
3626 }
3627
3628 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3629 static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \
3630 { \
3631 FUNC(d, cpu_env, m); \
3632 }
3633
3634 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3635 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3636 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3637 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3638 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3639 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3640
3641 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3642 {
3643 static NeonGenOneOpFn * const fn[] = {
3644 gen_VQABS_s8,
3645 gen_VQABS_s16,
3646 gen_VQABS_s32,
3647 NULL,
3648 };
3649 return do_2misc(s, a, fn[a->size]);
3650 }
3651
3652 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3653 {
3654 static NeonGenOneOpFn * const fn[] = {
3655 gen_VQNEG_s8,
3656 gen_VQNEG_s16,
3657 gen_VQNEG_s32,
3658 NULL,
3659 };
3660 return do_2misc(s, a, fn[a->size]);
3661 }
3662
3663 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \
3664 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3665 uint32_t rm_ofs, \
3666 uint32_t oprsz, uint32_t maxsz) \
3667 { \
3668 static gen_helper_gvec_2_ptr * const fns[4] = { \
3669 NULL, HFUNC, SFUNC, NULL, \
3670 }; \
3671 TCGv_ptr fpst; \
3672 fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \
3673 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \
3674 fns[vece]); \
3675 } \
3676 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3677 { \
3678 if (a->size == MO_16) { \
3679 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3680 return false; \
3681 } \
3682 } else if (a->size != MO_32) { \
3683 return false; \
3684 } \
3685 return do_2misc_vec(s, a, gen_##INSN); \
3686 }
3687
3688 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3689 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3690 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3691 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3692 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3693 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3694 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3695 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3696 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3697 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3698 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3699
3700 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3701
3702 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3703 {
3704 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3705 return false;
3706 }
3707 return trans_VRINTX_impl(s, a);
3708 }
3709
3710 #define DO_VEC_RMODE(INSN, RMODE, OP) \
3711 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3712 uint32_t rm_ofs, \
3713 uint32_t oprsz, uint32_t maxsz) \
3714 { \
3715 static gen_helper_gvec_2_ptr * const fns[4] = { \
3716 NULL, \
3717 gen_helper_gvec_##OP##h, \
3718 gen_helper_gvec_##OP##s, \
3719 NULL, \
3720 }; \
3721 TCGv_ptr fpst; \
3722 fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \
3723 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \
3724 arm_rmode_to_sf(RMODE), fns[vece]); \
3725 } \
3726 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3727 { \
3728 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \
3729 return false; \
3730 } \
3731 if (a->size == MO_16) { \
3732 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3733 return false; \
3734 } \
3735 } else if (a->size != MO_32) { \
3736 return false; \
3737 } \
3738 return do_2misc_vec(s, a, gen_##INSN); \
3739 }
3740
3741 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3742 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3743 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3744 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3745 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3746 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3747 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3748 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3749
3750 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3751 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3752 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3753 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3754 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3755
3756 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3757 {
3758 TCGv_i64 rm, rd;
3759 int pass;
3760
3761 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3762 return false;
3763 }
3764
3765 /* UNDEF accesses to D16-D31 if they don't exist. */
3766 if (!dc_isar_feature(aa32_simd_r32, s) &&
3767 ((a->vd | a->vm) & 0x10)) {
3768 return false;
3769 }
3770
3771 if (a->size != 0) {
3772 return false;
3773 }
3774
3775 if ((a->vd | a->vm) & a->q) {
3776 return false;
3777 }
3778
3779 if (!vfp_access_check(s)) {
3780 return true;
3781 }
3782
3783 rm = tcg_temp_new_i64();
3784 rd = tcg_temp_new_i64();
3785 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3786 read_neon_element64(rm, a->vm, pass, MO_64);
3787 read_neon_element64(rd, a->vd, pass, MO_64);
3788 write_neon_element64(rm, a->vd, pass, MO_64);
3789 write_neon_element64(rd, a->vm, pass, MO_64);
3790 }
3791 return true;
3792 }
3793
3794 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3795 {
3796 TCGv_i32 rd, tmp;
3797
3798 rd = tcg_temp_new_i32();
3799 tmp = tcg_temp_new_i32();
3800
3801 tcg_gen_shli_i32(rd, t0, 8);
3802 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3803 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3804 tcg_gen_or_i32(rd, rd, tmp);
3805
3806 tcg_gen_shri_i32(t1, t1, 8);
3807 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3808 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3809 tcg_gen_or_i32(t1, t1, tmp);
3810 tcg_gen_mov_i32(t0, rd);
3811 }
3812
3813 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3814 {
3815 TCGv_i32 rd, tmp;
3816
3817 rd = tcg_temp_new_i32();
3818 tmp = tcg_temp_new_i32();
3819
3820 tcg_gen_shli_i32(rd, t0, 16);
3821 tcg_gen_andi_i32(tmp, t1, 0xffff);
3822 tcg_gen_or_i32(rd, rd, tmp);
3823 tcg_gen_shri_i32(t1, t1, 16);
3824 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3825 tcg_gen_or_i32(t1, t1, tmp);
3826 tcg_gen_mov_i32(t0, rd);
3827 }
3828
3829 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3830 {
3831 TCGv_i32 tmp, tmp2;
3832 int pass;
3833
3834 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3835 return false;
3836 }
3837
3838 /* UNDEF accesses to D16-D31 if they don't exist. */
3839 if (!dc_isar_feature(aa32_simd_r32, s) &&
3840 ((a->vd | a->vm) & 0x10)) {
3841 return false;
3842 }
3843
3844 if ((a->vd | a->vm) & a->q) {
3845 return false;
3846 }
3847
3848 if (a->size == 3) {
3849 return false;
3850 }
3851
3852 if (!vfp_access_check(s)) {
3853 return true;
3854 }
3855
3856 tmp = tcg_temp_new_i32();
3857 tmp2 = tcg_temp_new_i32();
3858 if (a->size == MO_32) {
3859 for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
3860 read_neon_element32(tmp, a->vm, pass, MO_32);
3861 read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
3862 write_neon_element32(tmp2, a->vm, pass, MO_32);
3863 write_neon_element32(tmp, a->vd, pass + 1, MO_32);
3864 }
3865 } else {
3866 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3867 read_neon_element32(tmp, a->vm, pass, MO_32);
3868 read_neon_element32(tmp2, a->vd, pass, MO_32);
3869 if (a->size == MO_8) {
3870 gen_neon_trn_u8(tmp, tmp2);
3871 } else {
3872 gen_neon_trn_u16(tmp, tmp2);
3873 }
3874 write_neon_element32(tmp2, a->vm, pass, MO_32);
3875 write_neon_element32(tmp, a->vd, pass, MO_32);
3876 }
3877 }
3878 return true;
3879 }
3880
3881 static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
3882 {
3883 if (!dc_isar_feature(aa32_i8mm, s)) {
3884 return false;
3885 }
3886 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3887 gen_helper_gvec_smmla_b);
3888 }
3889
3890 static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
3891 {
3892 if (!dc_isar_feature(aa32_i8mm, s)) {
3893 return false;
3894 }
3895 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3896 gen_helper_gvec_ummla_b);
3897 }
3898
3899 static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
3900 {
3901 if (!dc_isar_feature(aa32_i8mm, s)) {
3902 return false;
3903 }
3904 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3905 gen_helper_gvec_usmmla_b);
3906 }
3907
3908 static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
3909 {
3910 if (!dc_isar_feature(aa32_bf16, s)) {
3911 return false;
3912 }
3913 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3914 gen_helper_gvec_bfmmla);
3915 }
3916
3917 static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
3918 {
3919 if (!dc_isar_feature(aa32_bf16, s)) {
3920 return false;
3921 }
3922 return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
3923 gen_helper_gvec_bfmlal);
3924 }
3925
3926 static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
3927 {
3928 if (!dc_isar_feature(aa32_bf16, s)) {
3929 return false;
3930 }
3931 return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
3932 (a->index << 1) | a->q, FPST_STD,
3933 gen_helper_gvec_bfmlal_idx);
3934 }