]> git.proxmox.com Git - mirror_qemu.git/blob - target/arm/translate-a64.c
tcg: Factor out probe_write() logic into probe_access()
[mirror_qemu.git] / target / arm / translate-a64.c
1 /*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "qemu/log.h"
26 #include "arm_ldst.h"
27 #include "translate.h"
28 #include "internals.h"
29 #include "qemu/host-utils.h"
30
31 #include "hw/semihosting/semihost.h"
32 #include "exec/gen-icount.h"
33
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
36 #include "exec/log.h"
37
38 #include "trace-tcg.h"
39 #include "translate-a64.h"
40 #include "qemu/atomic128.h"
41
42 static TCGv_i64 cpu_X[32];
43 static TCGv_i64 cpu_pc;
44
45 /* Load/store exclusive handling */
46 static TCGv_i64 cpu_exclusive_high;
47
48 static const char *regnames[] = {
49 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
50 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
51 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
52 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
53 };
54
55 enum a64_shift_type {
56 A64_SHIFT_TYPE_LSL = 0,
57 A64_SHIFT_TYPE_LSR = 1,
58 A64_SHIFT_TYPE_ASR = 2,
59 A64_SHIFT_TYPE_ROR = 3
60 };
61
62 /* Table based decoder typedefs - used when the relevant bits for decode
63 * are too awkwardly scattered across the instruction (eg SIMD).
64 */
65 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
66
67 typedef struct AArch64DecodeTable {
68 uint32_t pattern;
69 uint32_t mask;
70 AArch64DecodeFn *disas_fn;
71 } AArch64DecodeTable;
72
73 /* Function prototype for gen_ functions for calling Neon helpers */
74 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
75 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
76 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
77 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
78 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
79 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
80 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
81 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
82 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
83 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
84 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
85 typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
86 typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
87 typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
88 typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
89
90 /* initialize TCG globals. */
91 void a64_translate_init(void)
92 {
93 int i;
94
95 cpu_pc = tcg_global_mem_new_i64(cpu_env,
96 offsetof(CPUARMState, pc),
97 "pc");
98 for (i = 0; i < 32; i++) {
99 cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
100 offsetof(CPUARMState, xregs[i]),
101 regnames[i]);
102 }
103
104 cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
105 offsetof(CPUARMState, exclusive_high), "exclusive_high");
106 }
107
108 static inline int get_a64_user_mem_index(DisasContext *s)
109 {
110 /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
111 * if EL1, access as if EL0; otherwise access at current EL
112 */
113 ARMMMUIdx useridx;
114
115 switch (s->mmu_idx) {
116 case ARMMMUIdx_S12NSE1:
117 useridx = ARMMMUIdx_S12NSE0;
118 break;
119 case ARMMMUIdx_S1SE1:
120 useridx = ARMMMUIdx_S1SE0;
121 break;
122 case ARMMMUIdx_S2NS:
123 g_assert_not_reached();
124 default:
125 useridx = s->mmu_idx;
126 break;
127 }
128 return arm_to_core_mmu_idx(useridx);
129 }
130
131 static void reset_btype(DisasContext *s)
132 {
133 if (s->btype != 0) {
134 TCGv_i32 zero = tcg_const_i32(0);
135 tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
136 tcg_temp_free_i32(zero);
137 s->btype = 0;
138 }
139 }
140
141 static void set_btype(DisasContext *s, int val)
142 {
143 TCGv_i32 tcg_val;
144
145 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
146 tcg_debug_assert(val >= 1 && val <= 3);
147
148 tcg_val = tcg_const_i32(val);
149 tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
150 tcg_temp_free_i32(tcg_val);
151 s->btype = -1;
152 }
153
154 void gen_a64_set_pc_im(uint64_t val)
155 {
156 tcg_gen_movi_i64(cpu_pc, val);
157 }
158
159 /*
160 * Handle Top Byte Ignore (TBI) bits.
161 *
162 * If address tagging is enabled via the TCR TBI bits:
163 * + for EL2 and EL3 there is only one TBI bit, and if it is set
164 * then the address is zero-extended, clearing bits [63:56]
165 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
166 * and TBI1 controls addressses with bit 55 == 1.
167 * If the appropriate TBI bit is set for the address then
168 * the address is sign-extended from bit 55 into bits [63:56]
169 *
170 * Here We have concatenated TBI{1,0} into tbi.
171 */
172 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
173 TCGv_i64 src, int tbi)
174 {
175 if (tbi == 0) {
176 /* Load unmodified address */
177 tcg_gen_mov_i64(dst, src);
178 } else if (s->current_el >= 2) {
179 /* FIXME: ARMv8.1-VHE S2 translation regime. */
180 /* Force tag byte to all zero */
181 tcg_gen_extract_i64(dst, src, 0, 56);
182 } else {
183 /* Sign-extend from bit 55. */
184 tcg_gen_sextract_i64(dst, src, 0, 56);
185
186 if (tbi != 3) {
187 TCGv_i64 tcg_zero = tcg_const_i64(0);
188
189 /*
190 * The two TBI bits differ.
191 * If tbi0, then !tbi1: only use the extension if positive.
192 * if !tbi0, then tbi1: only use the extension if negative.
193 */
194 tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
195 dst, dst, tcg_zero, dst, src);
196 tcg_temp_free_i64(tcg_zero);
197 }
198 }
199 }
200
201 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
202 {
203 /*
204 * If address tagging is enabled for instructions via the TCR TBI bits,
205 * then loading an address into the PC will clear out any tag.
206 */
207 gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
208 }
209
210 /*
211 * Return a "clean" address for ADDR according to TBID.
212 * This is always a fresh temporary, as we need to be able to
213 * increment this independently of a dirty write-back address.
214 */
215 static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
216 {
217 TCGv_i64 clean = new_tmp_a64(s);
218 gen_top_byte_ignore(s, clean, addr, s->tbid);
219 return clean;
220 }
221
222 typedef struct DisasCompare64 {
223 TCGCond cond;
224 TCGv_i64 value;
225 } DisasCompare64;
226
227 static void a64_test_cc(DisasCompare64 *c64, int cc)
228 {
229 DisasCompare c32;
230
231 arm_test_cc(&c32, cc);
232
233 /* Sign-extend the 32-bit value so that the GE/LT comparisons work
234 * properly. The NE/EQ comparisons are also fine with this choice. */
235 c64->cond = c32.cond;
236 c64->value = tcg_temp_new_i64();
237 tcg_gen_ext_i32_i64(c64->value, c32.value);
238
239 arm_free_cc(&c32);
240 }
241
242 static void a64_free_cc(DisasCompare64 *c64)
243 {
244 tcg_temp_free_i64(c64->value);
245 }
246
247 static void gen_exception_internal(int excp)
248 {
249 TCGv_i32 tcg_excp = tcg_const_i32(excp);
250
251 assert(excp_is_internal(excp));
252 gen_helper_exception_internal(cpu_env, tcg_excp);
253 tcg_temp_free_i32(tcg_excp);
254 }
255
256 static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
257 {
258 gen_a64_set_pc_im(pc);
259 gen_exception_internal(excp);
260 s->base.is_jmp = DISAS_NORETURN;
261 }
262
263 static void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
264 uint32_t syndrome, uint32_t target_el)
265 {
266 gen_a64_set_pc_im(pc);
267 gen_exception(excp, syndrome, target_el);
268 s->base.is_jmp = DISAS_NORETURN;
269 }
270
271 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
272 {
273 TCGv_i32 tcg_syn;
274
275 gen_a64_set_pc_im(s->pc_curr);
276 tcg_syn = tcg_const_i32(syndrome);
277 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
278 tcg_temp_free_i32(tcg_syn);
279 s->base.is_jmp = DISAS_NORETURN;
280 }
281
282 static void gen_step_complete_exception(DisasContext *s)
283 {
284 /* We just completed step of an insn. Move from Active-not-pending
285 * to Active-pending, and then also take the swstep exception.
286 * This corresponds to making the (IMPDEF) choice to prioritize
287 * swstep exceptions over asynchronous exceptions taken to an exception
288 * level where debug is disabled. This choice has the advantage that
289 * we do not need to maintain internal state corresponding to the
290 * ISV/EX syndrome bits between completion of the step and generation
291 * of the exception, and our syndrome information is always correct.
292 */
293 gen_ss_advance(s);
294 gen_swstep_exception(s, 1, s->is_ldex);
295 s->base.is_jmp = DISAS_NORETURN;
296 }
297
298 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
299 {
300 /* No direct tb linking with singlestep (either QEMU's or the ARM
301 * debug architecture kind) or deterministic io
302 */
303 if (s->base.singlestep_enabled || s->ss_active ||
304 (tb_cflags(s->base.tb) & CF_LAST_IO)) {
305 return false;
306 }
307
308 #ifndef CONFIG_USER_ONLY
309 /* Only link tbs from inside the same guest page */
310 if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
311 return false;
312 }
313 #endif
314
315 return true;
316 }
317
318 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
319 {
320 TranslationBlock *tb;
321
322 tb = s->base.tb;
323 if (use_goto_tb(s, n, dest)) {
324 tcg_gen_goto_tb(n);
325 gen_a64_set_pc_im(dest);
326 tcg_gen_exit_tb(tb, n);
327 s->base.is_jmp = DISAS_NORETURN;
328 } else {
329 gen_a64_set_pc_im(dest);
330 if (s->ss_active) {
331 gen_step_complete_exception(s);
332 } else if (s->base.singlestep_enabled) {
333 gen_exception_internal(EXCP_DEBUG);
334 } else {
335 tcg_gen_lookup_and_goto_ptr();
336 s->base.is_jmp = DISAS_NORETURN;
337 }
338 }
339 }
340
341 static void init_tmp_a64_array(DisasContext *s)
342 {
343 #ifdef CONFIG_DEBUG_TCG
344 memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
345 #endif
346 s->tmp_a64_count = 0;
347 }
348
349 static void free_tmp_a64(DisasContext *s)
350 {
351 int i;
352 for (i = 0; i < s->tmp_a64_count; i++) {
353 tcg_temp_free_i64(s->tmp_a64[i]);
354 }
355 init_tmp_a64_array(s);
356 }
357
358 TCGv_i64 new_tmp_a64(DisasContext *s)
359 {
360 assert(s->tmp_a64_count < TMP_A64_MAX);
361 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
362 }
363
364 TCGv_i64 new_tmp_a64_zero(DisasContext *s)
365 {
366 TCGv_i64 t = new_tmp_a64(s);
367 tcg_gen_movi_i64(t, 0);
368 return t;
369 }
370
371 /*
372 * Register access functions
373 *
374 * These functions are used for directly accessing a register in where
375 * changes to the final register value are likely to be made. If you
376 * need to use a register for temporary calculation (e.g. index type
377 * operations) use the read_* form.
378 *
379 * B1.2.1 Register mappings
380 *
381 * In instruction register encoding 31 can refer to ZR (zero register) or
382 * the SP (stack pointer) depending on context. In QEMU's case we map SP
383 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
384 * This is the point of the _sp forms.
385 */
386 TCGv_i64 cpu_reg(DisasContext *s, int reg)
387 {
388 if (reg == 31) {
389 return new_tmp_a64_zero(s);
390 } else {
391 return cpu_X[reg];
392 }
393 }
394
395 /* register access for when 31 == SP */
396 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
397 {
398 return cpu_X[reg];
399 }
400
401 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
402 * representing the register contents. This TCGv is an auto-freed
403 * temporary so it need not be explicitly freed, and may be modified.
404 */
405 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
406 {
407 TCGv_i64 v = new_tmp_a64(s);
408 if (reg != 31) {
409 if (sf) {
410 tcg_gen_mov_i64(v, cpu_X[reg]);
411 } else {
412 tcg_gen_ext32u_i64(v, cpu_X[reg]);
413 }
414 } else {
415 tcg_gen_movi_i64(v, 0);
416 }
417 return v;
418 }
419
420 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
421 {
422 TCGv_i64 v = new_tmp_a64(s);
423 if (sf) {
424 tcg_gen_mov_i64(v, cpu_X[reg]);
425 } else {
426 tcg_gen_ext32u_i64(v, cpu_X[reg]);
427 }
428 return v;
429 }
430
431 /* Return the offset into CPUARMState of a slice (from
432 * the least significant end) of FP register Qn (ie
433 * Dn, Sn, Hn or Bn).
434 * (Note that this is not the same mapping as for A32; see cpu.h)
435 */
436 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
437 {
438 return vec_reg_offset(s, regno, 0, size);
439 }
440
441 /* Offset of the high half of the 128 bit vector Qn */
442 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
443 {
444 return vec_reg_offset(s, regno, 1, MO_64);
445 }
446
447 /* Convenience accessors for reading and writing single and double
448 * FP registers. Writing clears the upper parts of the associated
449 * 128 bit vector register, as required by the architecture.
450 * Note that unlike the GP register accessors, the values returned
451 * by the read functions must be manually freed.
452 */
453 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
454 {
455 TCGv_i64 v = tcg_temp_new_i64();
456
457 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
458 return v;
459 }
460
461 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
462 {
463 TCGv_i32 v = tcg_temp_new_i32();
464
465 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
466 return v;
467 }
468
469 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
470 {
471 TCGv_i32 v = tcg_temp_new_i32();
472
473 tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
474 return v;
475 }
476
477 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
478 * If SVE is not enabled, then there are only 128 bits in the vector.
479 */
480 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
481 {
482 unsigned ofs = fp_reg_offset(s, rd, MO_64);
483 unsigned vsz = vec_full_reg_size(s);
484
485 if (!is_q) {
486 TCGv_i64 tcg_zero = tcg_const_i64(0);
487 tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
488 tcg_temp_free_i64(tcg_zero);
489 }
490 if (vsz > 16) {
491 tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
492 }
493 }
494
495 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
496 {
497 unsigned ofs = fp_reg_offset(s, reg, MO_64);
498
499 tcg_gen_st_i64(v, cpu_env, ofs);
500 clear_vec_high(s, false, reg);
501 }
502
503 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
504 {
505 TCGv_i64 tmp = tcg_temp_new_i64();
506
507 tcg_gen_extu_i32_i64(tmp, v);
508 write_fp_dreg(s, reg, tmp);
509 tcg_temp_free_i64(tmp);
510 }
511
512 TCGv_ptr get_fpstatus_ptr(bool is_f16)
513 {
514 TCGv_ptr statusptr = tcg_temp_new_ptr();
515 int offset;
516
517 /* In A64 all instructions (both FP and Neon) use the FPCR; there
518 * is no equivalent of the A32 Neon "standard FPSCR value".
519 * However half-precision operations operate under a different
520 * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
521 */
522 if (is_f16) {
523 offset = offsetof(CPUARMState, vfp.fp_status_f16);
524 } else {
525 offset = offsetof(CPUARMState, vfp.fp_status);
526 }
527 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
528 return statusptr;
529 }
530
531 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */
532 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
533 GVecGen2Fn *gvec_fn, int vece)
534 {
535 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
536 is_q ? 16 : 8, vec_full_reg_size(s));
537 }
538
539 /* Expand a 2-operand + immediate AdvSIMD vector operation using
540 * an expander function.
541 */
542 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
543 int64_t imm, GVecGen2iFn *gvec_fn, int vece)
544 {
545 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
546 imm, is_q ? 16 : 8, vec_full_reg_size(s));
547 }
548
549 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */
550 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
551 GVecGen3Fn *gvec_fn, int vece)
552 {
553 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
554 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
555 }
556
557 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */
558 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
559 int rx, GVecGen4Fn *gvec_fn, int vece)
560 {
561 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
562 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
563 is_q ? 16 : 8, vec_full_reg_size(s));
564 }
565
566 /* Expand a 2-operand + immediate AdvSIMD vector operation using
567 * an op descriptor.
568 */
569 static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
570 int rn, int64_t imm, const GVecGen2i *gvec_op)
571 {
572 tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
573 is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
574 }
575
576 /* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
577 static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
578 int rn, int rm, const GVecGen3 *gvec_op)
579 {
580 tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
581 vec_full_reg_offset(s, rm), is_q ? 16 : 8,
582 vec_full_reg_size(s), gvec_op);
583 }
584
585 /* Expand a 3-operand operation using an out-of-line helper. */
586 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
587 int rn, int rm, int data, gen_helper_gvec_3 *fn)
588 {
589 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
590 vec_full_reg_offset(s, rn),
591 vec_full_reg_offset(s, rm),
592 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
593 }
594
595 /* Expand a 3-operand + env pointer operation using
596 * an out-of-line helper.
597 */
598 static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
599 int rn, int rm, gen_helper_gvec_3_ptr *fn)
600 {
601 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
602 vec_full_reg_offset(s, rn),
603 vec_full_reg_offset(s, rm), cpu_env,
604 is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
605 }
606
607 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
608 * an out-of-line helper.
609 */
610 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
611 int rm, bool is_fp16, int data,
612 gen_helper_gvec_3_ptr *fn)
613 {
614 TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
615 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
616 vec_full_reg_offset(s, rn),
617 vec_full_reg_offset(s, rm), fpst,
618 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
619 tcg_temp_free_ptr(fpst);
620 }
621
622 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
623 * than the 32 bit equivalent.
624 */
625 static inline void gen_set_NZ64(TCGv_i64 result)
626 {
627 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
628 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
629 }
630
631 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
632 static inline void gen_logic_CC(int sf, TCGv_i64 result)
633 {
634 if (sf) {
635 gen_set_NZ64(result);
636 } else {
637 tcg_gen_extrl_i64_i32(cpu_ZF, result);
638 tcg_gen_mov_i32(cpu_NF, cpu_ZF);
639 }
640 tcg_gen_movi_i32(cpu_CF, 0);
641 tcg_gen_movi_i32(cpu_VF, 0);
642 }
643
644 /* dest = T0 + T1; compute C, N, V and Z flags */
645 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
646 {
647 if (sf) {
648 TCGv_i64 result, flag, tmp;
649 result = tcg_temp_new_i64();
650 flag = tcg_temp_new_i64();
651 tmp = tcg_temp_new_i64();
652
653 tcg_gen_movi_i64(tmp, 0);
654 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
655
656 tcg_gen_extrl_i64_i32(cpu_CF, flag);
657
658 gen_set_NZ64(result);
659
660 tcg_gen_xor_i64(flag, result, t0);
661 tcg_gen_xor_i64(tmp, t0, t1);
662 tcg_gen_andc_i64(flag, flag, tmp);
663 tcg_temp_free_i64(tmp);
664 tcg_gen_extrh_i64_i32(cpu_VF, flag);
665
666 tcg_gen_mov_i64(dest, result);
667 tcg_temp_free_i64(result);
668 tcg_temp_free_i64(flag);
669 } else {
670 /* 32 bit arithmetic */
671 TCGv_i32 t0_32 = tcg_temp_new_i32();
672 TCGv_i32 t1_32 = tcg_temp_new_i32();
673 TCGv_i32 tmp = tcg_temp_new_i32();
674
675 tcg_gen_movi_i32(tmp, 0);
676 tcg_gen_extrl_i64_i32(t0_32, t0);
677 tcg_gen_extrl_i64_i32(t1_32, t1);
678 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
679 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
680 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
681 tcg_gen_xor_i32(tmp, t0_32, t1_32);
682 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
683 tcg_gen_extu_i32_i64(dest, cpu_NF);
684
685 tcg_temp_free_i32(tmp);
686 tcg_temp_free_i32(t0_32);
687 tcg_temp_free_i32(t1_32);
688 }
689 }
690
691 /* dest = T0 - T1; compute C, N, V and Z flags */
692 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
693 {
694 if (sf) {
695 /* 64 bit arithmetic */
696 TCGv_i64 result, flag, tmp;
697
698 result = tcg_temp_new_i64();
699 flag = tcg_temp_new_i64();
700 tcg_gen_sub_i64(result, t0, t1);
701
702 gen_set_NZ64(result);
703
704 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
705 tcg_gen_extrl_i64_i32(cpu_CF, flag);
706
707 tcg_gen_xor_i64(flag, result, t0);
708 tmp = tcg_temp_new_i64();
709 tcg_gen_xor_i64(tmp, t0, t1);
710 tcg_gen_and_i64(flag, flag, tmp);
711 tcg_temp_free_i64(tmp);
712 tcg_gen_extrh_i64_i32(cpu_VF, flag);
713 tcg_gen_mov_i64(dest, result);
714 tcg_temp_free_i64(flag);
715 tcg_temp_free_i64(result);
716 } else {
717 /* 32 bit arithmetic */
718 TCGv_i32 t0_32 = tcg_temp_new_i32();
719 TCGv_i32 t1_32 = tcg_temp_new_i32();
720 TCGv_i32 tmp;
721
722 tcg_gen_extrl_i64_i32(t0_32, t0);
723 tcg_gen_extrl_i64_i32(t1_32, t1);
724 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
725 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
726 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
727 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
728 tmp = tcg_temp_new_i32();
729 tcg_gen_xor_i32(tmp, t0_32, t1_32);
730 tcg_temp_free_i32(t0_32);
731 tcg_temp_free_i32(t1_32);
732 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
733 tcg_temp_free_i32(tmp);
734 tcg_gen_extu_i32_i64(dest, cpu_NF);
735 }
736 }
737
738 /* dest = T0 + T1 + CF; do not compute flags. */
739 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
740 {
741 TCGv_i64 flag = tcg_temp_new_i64();
742 tcg_gen_extu_i32_i64(flag, cpu_CF);
743 tcg_gen_add_i64(dest, t0, t1);
744 tcg_gen_add_i64(dest, dest, flag);
745 tcg_temp_free_i64(flag);
746
747 if (!sf) {
748 tcg_gen_ext32u_i64(dest, dest);
749 }
750 }
751
752 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
753 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
754 {
755 if (sf) {
756 TCGv_i64 result, cf_64, vf_64, tmp;
757 result = tcg_temp_new_i64();
758 cf_64 = tcg_temp_new_i64();
759 vf_64 = tcg_temp_new_i64();
760 tmp = tcg_const_i64(0);
761
762 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
763 tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
764 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
765 tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
766 gen_set_NZ64(result);
767
768 tcg_gen_xor_i64(vf_64, result, t0);
769 tcg_gen_xor_i64(tmp, t0, t1);
770 tcg_gen_andc_i64(vf_64, vf_64, tmp);
771 tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
772
773 tcg_gen_mov_i64(dest, result);
774
775 tcg_temp_free_i64(tmp);
776 tcg_temp_free_i64(vf_64);
777 tcg_temp_free_i64(cf_64);
778 tcg_temp_free_i64(result);
779 } else {
780 TCGv_i32 t0_32, t1_32, tmp;
781 t0_32 = tcg_temp_new_i32();
782 t1_32 = tcg_temp_new_i32();
783 tmp = tcg_const_i32(0);
784
785 tcg_gen_extrl_i64_i32(t0_32, t0);
786 tcg_gen_extrl_i64_i32(t1_32, t1);
787 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
788 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
789
790 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
791 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
792 tcg_gen_xor_i32(tmp, t0_32, t1_32);
793 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
794 tcg_gen_extu_i32_i64(dest, cpu_NF);
795
796 tcg_temp_free_i32(tmp);
797 tcg_temp_free_i32(t1_32);
798 tcg_temp_free_i32(t0_32);
799 }
800 }
801
802 /*
803 * Load/Store generators
804 */
805
806 /*
807 * Store from GPR register to memory.
808 */
809 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
810 TCGv_i64 tcg_addr, int size, int memidx,
811 bool iss_valid,
812 unsigned int iss_srt,
813 bool iss_sf, bool iss_ar)
814 {
815 g_assert(size <= 3);
816 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
817
818 if (iss_valid) {
819 uint32_t syn;
820
821 syn = syn_data_abort_with_iss(0,
822 size,
823 false,
824 iss_srt,
825 iss_sf,
826 iss_ar,
827 0, 0, 0, 0, 0, false);
828 disas_set_insn_syndrome(s, syn);
829 }
830 }
831
832 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
833 TCGv_i64 tcg_addr, int size,
834 bool iss_valid,
835 unsigned int iss_srt,
836 bool iss_sf, bool iss_ar)
837 {
838 do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
839 iss_valid, iss_srt, iss_sf, iss_ar);
840 }
841
842 /*
843 * Load from memory to GPR register
844 */
845 static void do_gpr_ld_memidx(DisasContext *s,
846 TCGv_i64 dest, TCGv_i64 tcg_addr,
847 int size, bool is_signed,
848 bool extend, int memidx,
849 bool iss_valid, unsigned int iss_srt,
850 bool iss_sf, bool iss_ar)
851 {
852 MemOp memop = s->be_data + size;
853
854 g_assert(size <= 3);
855
856 if (is_signed) {
857 memop += MO_SIGN;
858 }
859
860 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
861
862 if (extend && is_signed) {
863 g_assert(size < 3);
864 tcg_gen_ext32u_i64(dest, dest);
865 }
866
867 if (iss_valid) {
868 uint32_t syn;
869
870 syn = syn_data_abort_with_iss(0,
871 size,
872 is_signed,
873 iss_srt,
874 iss_sf,
875 iss_ar,
876 0, 0, 0, 0, 0, false);
877 disas_set_insn_syndrome(s, syn);
878 }
879 }
880
881 static void do_gpr_ld(DisasContext *s,
882 TCGv_i64 dest, TCGv_i64 tcg_addr,
883 int size, bool is_signed, bool extend,
884 bool iss_valid, unsigned int iss_srt,
885 bool iss_sf, bool iss_ar)
886 {
887 do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
888 get_mem_index(s),
889 iss_valid, iss_srt, iss_sf, iss_ar);
890 }
891
892 /*
893 * Store from FP register to memory
894 */
895 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
896 {
897 /* This writes the bottom N bits of a 128 bit wide vector to memory */
898 TCGv_i64 tmp = tcg_temp_new_i64();
899 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
900 if (size < 4) {
901 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
902 s->be_data + size);
903 } else {
904 bool be = s->be_data == MO_BE;
905 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
906
907 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
908 tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
909 s->be_data | MO_Q);
910 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
911 tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
912 s->be_data | MO_Q);
913 tcg_temp_free_i64(tcg_hiaddr);
914 }
915
916 tcg_temp_free_i64(tmp);
917 }
918
919 /*
920 * Load from memory to FP register
921 */
922 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
923 {
924 /* This always zero-extends and writes to a full 128 bit wide vector */
925 TCGv_i64 tmplo = tcg_temp_new_i64();
926 TCGv_i64 tmphi;
927
928 if (size < 4) {
929 MemOp memop = s->be_data + size;
930 tmphi = tcg_const_i64(0);
931 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
932 } else {
933 bool be = s->be_data == MO_BE;
934 TCGv_i64 tcg_hiaddr;
935
936 tmphi = tcg_temp_new_i64();
937 tcg_hiaddr = tcg_temp_new_i64();
938
939 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
940 tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
941 s->be_data | MO_Q);
942 tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
943 s->be_data | MO_Q);
944 tcg_temp_free_i64(tcg_hiaddr);
945 }
946
947 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
948 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
949
950 tcg_temp_free_i64(tmplo);
951 tcg_temp_free_i64(tmphi);
952
953 clear_vec_high(s, true, destidx);
954 }
955
956 /*
957 * Vector load/store helpers.
958 *
959 * The principal difference between this and a FP load is that we don't
960 * zero extend as we are filling a partial chunk of the vector register.
961 * These functions don't support 128 bit loads/stores, which would be
962 * normal load/store operations.
963 *
964 * The _i32 versions are useful when operating on 32 bit quantities
965 * (eg for floating point single or using Neon helper functions).
966 */
967
968 /* Get value of an element within a vector register */
969 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
970 int element, MemOp memop)
971 {
972 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
973 switch (memop) {
974 case MO_8:
975 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
976 break;
977 case MO_16:
978 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
979 break;
980 case MO_32:
981 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
982 break;
983 case MO_8|MO_SIGN:
984 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
985 break;
986 case MO_16|MO_SIGN:
987 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
988 break;
989 case MO_32|MO_SIGN:
990 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
991 break;
992 case MO_64:
993 case MO_64|MO_SIGN:
994 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
995 break;
996 default:
997 g_assert_not_reached();
998 }
999 }
1000
1001 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1002 int element, MemOp memop)
1003 {
1004 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1005 switch (memop) {
1006 case MO_8:
1007 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1008 break;
1009 case MO_16:
1010 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1011 break;
1012 case MO_8|MO_SIGN:
1013 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1014 break;
1015 case MO_16|MO_SIGN:
1016 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1017 break;
1018 case MO_32:
1019 case MO_32|MO_SIGN:
1020 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1021 break;
1022 default:
1023 g_assert_not_reached();
1024 }
1025 }
1026
1027 /* Set value of an element within a vector register */
1028 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1029 int element, MemOp memop)
1030 {
1031 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1032 switch (memop) {
1033 case MO_8:
1034 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1035 break;
1036 case MO_16:
1037 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1038 break;
1039 case MO_32:
1040 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1041 break;
1042 case MO_64:
1043 tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1044 break;
1045 default:
1046 g_assert_not_reached();
1047 }
1048 }
1049
1050 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1051 int destidx, int element, MemOp memop)
1052 {
1053 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1054 switch (memop) {
1055 case MO_8:
1056 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1057 break;
1058 case MO_16:
1059 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1060 break;
1061 case MO_32:
1062 tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1063 break;
1064 default:
1065 g_assert_not_reached();
1066 }
1067 }
1068
1069 /* Store from vector register to memory */
1070 static void do_vec_st(DisasContext *s, int srcidx, int element,
1071 TCGv_i64 tcg_addr, int size, MemOp endian)
1072 {
1073 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1074
1075 read_vec_element(s, tcg_tmp, srcidx, element, size);
1076 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1077
1078 tcg_temp_free_i64(tcg_tmp);
1079 }
1080
1081 /* Load from memory to vector register */
1082 static void do_vec_ld(DisasContext *s, int destidx, int element,
1083 TCGv_i64 tcg_addr, int size, MemOp endian)
1084 {
1085 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1086
1087 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1088 write_vec_element(s, tcg_tmp, destidx, element, size);
1089
1090 tcg_temp_free_i64(tcg_tmp);
1091 }
1092
1093 /* Check that FP/Neon access is enabled. If it is, return
1094 * true. If not, emit code to generate an appropriate exception,
1095 * and return false; the caller should not emit any code for
1096 * the instruction. Note that this check must happen after all
1097 * unallocated-encoding checks (otherwise the syndrome information
1098 * for the resulting exception will be incorrect).
1099 */
1100 static inline bool fp_access_check(DisasContext *s)
1101 {
1102 assert(!s->fp_access_checked);
1103 s->fp_access_checked = true;
1104
1105 if (!s->fp_excp_el) {
1106 return true;
1107 }
1108
1109 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1110 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
1111 return false;
1112 }
1113
1114 /* Check that SVE access is enabled. If it is, return true.
1115 * If not, emit code to generate an appropriate exception and return false.
1116 */
1117 bool sve_access_check(DisasContext *s)
1118 {
1119 if (s->sve_excp_el) {
1120 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(),
1121 s->sve_excp_el);
1122 return false;
1123 }
1124 return fp_access_check(s);
1125 }
1126
1127 /*
1128 * This utility function is for doing register extension with an
1129 * optional shift. You will likely want to pass a temporary for the
1130 * destination register. See DecodeRegExtend() in the ARM ARM.
1131 */
1132 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1133 int option, unsigned int shift)
1134 {
1135 int extsize = extract32(option, 0, 2);
1136 bool is_signed = extract32(option, 2, 1);
1137
1138 if (is_signed) {
1139 switch (extsize) {
1140 case 0:
1141 tcg_gen_ext8s_i64(tcg_out, tcg_in);
1142 break;
1143 case 1:
1144 tcg_gen_ext16s_i64(tcg_out, tcg_in);
1145 break;
1146 case 2:
1147 tcg_gen_ext32s_i64(tcg_out, tcg_in);
1148 break;
1149 case 3:
1150 tcg_gen_mov_i64(tcg_out, tcg_in);
1151 break;
1152 }
1153 } else {
1154 switch (extsize) {
1155 case 0:
1156 tcg_gen_ext8u_i64(tcg_out, tcg_in);
1157 break;
1158 case 1:
1159 tcg_gen_ext16u_i64(tcg_out, tcg_in);
1160 break;
1161 case 2:
1162 tcg_gen_ext32u_i64(tcg_out, tcg_in);
1163 break;
1164 case 3:
1165 tcg_gen_mov_i64(tcg_out, tcg_in);
1166 break;
1167 }
1168 }
1169
1170 if (shift) {
1171 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1172 }
1173 }
1174
1175 static inline void gen_check_sp_alignment(DisasContext *s)
1176 {
1177 /* The AArch64 architecture mandates that (if enabled via PSTATE
1178 * or SCTLR bits) there is a check that SP is 16-aligned on every
1179 * SP-relative load or store (with an exception generated if it is not).
1180 * In line with general QEMU practice regarding misaligned accesses,
1181 * we omit these checks for the sake of guest program performance.
1182 * This function is provided as a hook so we can more easily add these
1183 * checks in future (possibly as a "favour catching guest program bugs
1184 * over speed" user selectable option).
1185 */
1186 }
1187
1188 /*
1189 * This provides a simple table based table lookup decoder. It is
1190 * intended to be used when the relevant bits for decode are too
1191 * awkwardly placed and switch/if based logic would be confusing and
1192 * deeply nested. Since it's a linear search through the table, tables
1193 * should be kept small.
1194 *
1195 * It returns the first handler where insn & mask == pattern, or
1196 * NULL if there is no match.
1197 * The table is terminated by an empty mask (i.e. 0)
1198 */
1199 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1200 uint32_t insn)
1201 {
1202 const AArch64DecodeTable *tptr = table;
1203
1204 while (tptr->mask) {
1205 if ((insn & tptr->mask) == tptr->pattern) {
1206 return tptr->disas_fn;
1207 }
1208 tptr++;
1209 }
1210 return NULL;
1211 }
1212
1213 /*
1214 * The instruction disassembly implemented here matches
1215 * the instruction encoding classifications in chapter C4
1216 * of the ARM Architecture Reference Manual (DDI0487B_a);
1217 * classification names and decode diagrams here should generally
1218 * match up with those in the manual.
1219 */
1220
1221 /* Unconditional branch (immediate)
1222 * 31 30 26 25 0
1223 * +----+-----------+-------------------------------------+
1224 * | op | 0 0 1 0 1 | imm26 |
1225 * +----+-----------+-------------------------------------+
1226 */
1227 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1228 {
1229 uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4;
1230
1231 if (insn & (1U << 31)) {
1232 /* BL Branch with link */
1233 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
1234 }
1235
1236 /* B Branch / BL Branch with link */
1237 reset_btype(s);
1238 gen_goto_tb(s, 0, addr);
1239 }
1240
1241 /* Compare and branch (immediate)
1242 * 31 30 25 24 23 5 4 0
1243 * +----+-------------+----+---------------------+--------+
1244 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt |
1245 * +----+-------------+----+---------------------+--------+
1246 */
1247 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1248 {
1249 unsigned int sf, op, rt;
1250 uint64_t addr;
1251 TCGLabel *label_match;
1252 TCGv_i64 tcg_cmp;
1253
1254 sf = extract32(insn, 31, 1);
1255 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1256 rt = extract32(insn, 0, 5);
1257 addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1258
1259 tcg_cmp = read_cpu_reg(s, rt, sf);
1260 label_match = gen_new_label();
1261
1262 reset_btype(s);
1263 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1264 tcg_cmp, 0, label_match);
1265
1266 gen_goto_tb(s, 0, s->base.pc_next);
1267 gen_set_label(label_match);
1268 gen_goto_tb(s, 1, addr);
1269 }
1270
1271 /* Test and branch (immediate)
1272 * 31 30 25 24 23 19 18 5 4 0
1273 * +----+-------------+----+-------+-------------+------+
1274 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt |
1275 * +----+-------------+----+-------+-------------+------+
1276 */
1277 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1278 {
1279 unsigned int bit_pos, op, rt;
1280 uint64_t addr;
1281 TCGLabel *label_match;
1282 TCGv_i64 tcg_cmp;
1283
1284 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1285 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1286 addr = s->pc_curr + sextract32(insn, 5, 14) * 4;
1287 rt = extract32(insn, 0, 5);
1288
1289 tcg_cmp = tcg_temp_new_i64();
1290 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1291 label_match = gen_new_label();
1292
1293 reset_btype(s);
1294 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1295 tcg_cmp, 0, label_match);
1296 tcg_temp_free_i64(tcg_cmp);
1297 gen_goto_tb(s, 0, s->base.pc_next);
1298 gen_set_label(label_match);
1299 gen_goto_tb(s, 1, addr);
1300 }
1301
1302 /* Conditional branch (immediate)
1303 * 31 25 24 23 5 4 3 0
1304 * +---------------+----+---------------------+----+------+
1305 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond |
1306 * +---------------+----+---------------------+----+------+
1307 */
1308 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1309 {
1310 unsigned int cond;
1311 uint64_t addr;
1312
1313 if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1314 unallocated_encoding(s);
1315 return;
1316 }
1317 addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1318 cond = extract32(insn, 0, 4);
1319
1320 reset_btype(s);
1321 if (cond < 0x0e) {
1322 /* genuinely conditional branches */
1323 TCGLabel *label_match = gen_new_label();
1324 arm_gen_test_cc(cond, label_match);
1325 gen_goto_tb(s, 0, s->base.pc_next);
1326 gen_set_label(label_match);
1327 gen_goto_tb(s, 1, addr);
1328 } else {
1329 /* 0xe and 0xf are both "always" conditions */
1330 gen_goto_tb(s, 0, addr);
1331 }
1332 }
1333
1334 /* HINT instruction group, including various allocated HINTs */
1335 static void handle_hint(DisasContext *s, uint32_t insn,
1336 unsigned int op1, unsigned int op2, unsigned int crm)
1337 {
1338 unsigned int selector = crm << 3 | op2;
1339
1340 if (op1 != 3) {
1341 unallocated_encoding(s);
1342 return;
1343 }
1344
1345 switch (selector) {
1346 case 0b00000: /* NOP */
1347 break;
1348 case 0b00011: /* WFI */
1349 s->base.is_jmp = DISAS_WFI;
1350 break;
1351 case 0b00001: /* YIELD */
1352 /* When running in MTTCG we don't generate jumps to the yield and
1353 * WFE helpers as it won't affect the scheduling of other vCPUs.
1354 * If we wanted to more completely model WFE/SEV so we don't busy
1355 * spin unnecessarily we would need to do something more involved.
1356 */
1357 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1358 s->base.is_jmp = DISAS_YIELD;
1359 }
1360 break;
1361 case 0b00010: /* WFE */
1362 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1363 s->base.is_jmp = DISAS_WFE;
1364 }
1365 break;
1366 case 0b00100: /* SEV */
1367 case 0b00101: /* SEVL */
1368 /* we treat all as NOP at least for now */
1369 break;
1370 case 0b00111: /* XPACLRI */
1371 if (s->pauth_active) {
1372 gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1373 }
1374 break;
1375 case 0b01000: /* PACIA1716 */
1376 if (s->pauth_active) {
1377 gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1378 }
1379 break;
1380 case 0b01010: /* PACIB1716 */
1381 if (s->pauth_active) {
1382 gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1383 }
1384 break;
1385 case 0b01100: /* AUTIA1716 */
1386 if (s->pauth_active) {
1387 gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1388 }
1389 break;
1390 case 0b01110: /* AUTIB1716 */
1391 if (s->pauth_active) {
1392 gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1393 }
1394 break;
1395 case 0b11000: /* PACIAZ */
1396 if (s->pauth_active) {
1397 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1398 new_tmp_a64_zero(s));
1399 }
1400 break;
1401 case 0b11001: /* PACIASP */
1402 if (s->pauth_active) {
1403 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1404 }
1405 break;
1406 case 0b11010: /* PACIBZ */
1407 if (s->pauth_active) {
1408 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1409 new_tmp_a64_zero(s));
1410 }
1411 break;
1412 case 0b11011: /* PACIBSP */
1413 if (s->pauth_active) {
1414 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1415 }
1416 break;
1417 case 0b11100: /* AUTIAZ */
1418 if (s->pauth_active) {
1419 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1420 new_tmp_a64_zero(s));
1421 }
1422 break;
1423 case 0b11101: /* AUTIASP */
1424 if (s->pauth_active) {
1425 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1426 }
1427 break;
1428 case 0b11110: /* AUTIBZ */
1429 if (s->pauth_active) {
1430 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1431 new_tmp_a64_zero(s));
1432 }
1433 break;
1434 case 0b11111: /* AUTIBSP */
1435 if (s->pauth_active) {
1436 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1437 }
1438 break;
1439 default:
1440 /* default specified as NOP equivalent */
1441 break;
1442 }
1443 }
1444
1445 static void gen_clrex(DisasContext *s, uint32_t insn)
1446 {
1447 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1448 }
1449
1450 /* CLREX, DSB, DMB, ISB */
1451 static void handle_sync(DisasContext *s, uint32_t insn,
1452 unsigned int op1, unsigned int op2, unsigned int crm)
1453 {
1454 TCGBar bar;
1455
1456 if (op1 != 3) {
1457 unallocated_encoding(s);
1458 return;
1459 }
1460
1461 switch (op2) {
1462 case 2: /* CLREX */
1463 gen_clrex(s, insn);
1464 return;
1465 case 4: /* DSB */
1466 case 5: /* DMB */
1467 switch (crm & 3) {
1468 case 1: /* MBReqTypes_Reads */
1469 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1470 break;
1471 case 2: /* MBReqTypes_Writes */
1472 bar = TCG_BAR_SC | TCG_MO_ST_ST;
1473 break;
1474 default: /* MBReqTypes_All */
1475 bar = TCG_BAR_SC | TCG_MO_ALL;
1476 break;
1477 }
1478 tcg_gen_mb(bar);
1479 return;
1480 case 6: /* ISB */
1481 /* We need to break the TB after this insn to execute
1482 * a self-modified code correctly and also to take
1483 * any pending interrupts immediately.
1484 */
1485 reset_btype(s);
1486 gen_goto_tb(s, 0, s->base.pc_next);
1487 return;
1488
1489 case 7: /* SB */
1490 if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1491 goto do_unallocated;
1492 }
1493 /*
1494 * TODO: There is no speculation barrier opcode for TCG;
1495 * MB and end the TB instead.
1496 */
1497 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1498 gen_goto_tb(s, 0, s->base.pc_next);
1499 return;
1500
1501 default:
1502 do_unallocated:
1503 unallocated_encoding(s);
1504 return;
1505 }
1506 }
1507
1508 static void gen_xaflag(void)
1509 {
1510 TCGv_i32 z = tcg_temp_new_i32();
1511
1512 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1513
1514 /*
1515 * (!C & !Z) << 31
1516 * (!(C | Z)) << 31
1517 * ~((C | Z) << 31)
1518 * ~-(C | Z)
1519 * (C | Z) - 1
1520 */
1521 tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1522 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1523
1524 /* !(Z & C) */
1525 tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1526 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1527
1528 /* (!C & Z) << 31 -> -(Z & ~C) */
1529 tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1530 tcg_gen_neg_i32(cpu_VF, cpu_VF);
1531
1532 /* C | Z */
1533 tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1534
1535 tcg_temp_free_i32(z);
1536 }
1537
1538 static void gen_axflag(void)
1539 {
1540 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */
1541 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */
1542
1543 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1544 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1545
1546 tcg_gen_movi_i32(cpu_NF, 0);
1547 tcg_gen_movi_i32(cpu_VF, 0);
1548 }
1549
1550 /* MSR (immediate) - move immediate to processor state field */
1551 static void handle_msr_i(DisasContext *s, uint32_t insn,
1552 unsigned int op1, unsigned int op2, unsigned int crm)
1553 {
1554 TCGv_i32 t1;
1555 int op = op1 << 3 | op2;
1556
1557 /* End the TB by default, chaining is ok. */
1558 s->base.is_jmp = DISAS_TOO_MANY;
1559
1560 switch (op) {
1561 case 0x00: /* CFINV */
1562 if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1563 goto do_unallocated;
1564 }
1565 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1566 s->base.is_jmp = DISAS_NEXT;
1567 break;
1568
1569 case 0x01: /* XAFlag */
1570 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1571 goto do_unallocated;
1572 }
1573 gen_xaflag();
1574 s->base.is_jmp = DISAS_NEXT;
1575 break;
1576
1577 case 0x02: /* AXFlag */
1578 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1579 goto do_unallocated;
1580 }
1581 gen_axflag();
1582 s->base.is_jmp = DISAS_NEXT;
1583 break;
1584
1585 case 0x05: /* SPSel */
1586 if (s->current_el == 0) {
1587 goto do_unallocated;
1588 }
1589 t1 = tcg_const_i32(crm & PSTATE_SP);
1590 gen_helper_msr_i_spsel(cpu_env, t1);
1591 tcg_temp_free_i32(t1);
1592 break;
1593
1594 case 0x1e: /* DAIFSet */
1595 t1 = tcg_const_i32(crm);
1596 gen_helper_msr_i_daifset(cpu_env, t1);
1597 tcg_temp_free_i32(t1);
1598 break;
1599
1600 case 0x1f: /* DAIFClear */
1601 t1 = tcg_const_i32(crm);
1602 gen_helper_msr_i_daifclear(cpu_env, t1);
1603 tcg_temp_free_i32(t1);
1604 /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */
1605 s->base.is_jmp = DISAS_UPDATE;
1606 break;
1607
1608 default:
1609 do_unallocated:
1610 unallocated_encoding(s);
1611 return;
1612 }
1613 }
1614
1615 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1616 {
1617 TCGv_i32 tmp = tcg_temp_new_i32();
1618 TCGv_i32 nzcv = tcg_temp_new_i32();
1619
1620 /* build bit 31, N */
1621 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1622 /* build bit 30, Z */
1623 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1624 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1625 /* build bit 29, C */
1626 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1627 /* build bit 28, V */
1628 tcg_gen_shri_i32(tmp, cpu_VF, 31);
1629 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1630 /* generate result */
1631 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1632
1633 tcg_temp_free_i32(nzcv);
1634 tcg_temp_free_i32(tmp);
1635 }
1636
1637 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1638 {
1639 TCGv_i32 nzcv = tcg_temp_new_i32();
1640
1641 /* take NZCV from R[t] */
1642 tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1643
1644 /* bit 31, N */
1645 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1646 /* bit 30, Z */
1647 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1648 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1649 /* bit 29, C */
1650 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1651 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1652 /* bit 28, V */
1653 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1654 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1655 tcg_temp_free_i32(nzcv);
1656 }
1657
1658 /* MRS - move from system register
1659 * MSR (register) - move to system register
1660 * SYS
1661 * SYSL
1662 * These are all essentially the same insn in 'read' and 'write'
1663 * versions, with varying op0 fields.
1664 */
1665 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1666 unsigned int op0, unsigned int op1, unsigned int op2,
1667 unsigned int crn, unsigned int crm, unsigned int rt)
1668 {
1669 const ARMCPRegInfo *ri;
1670 TCGv_i64 tcg_rt;
1671
1672 ri = get_arm_cp_reginfo(s->cp_regs,
1673 ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1674 crn, crm, op0, op1, op2));
1675
1676 if (!ri) {
1677 /* Unknown register; this might be a guest error or a QEMU
1678 * unimplemented feature.
1679 */
1680 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1681 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1682 isread ? "read" : "write", op0, op1, crn, crm, op2);
1683 unallocated_encoding(s);
1684 return;
1685 }
1686
1687 /* Check access permissions */
1688 if (!cp_access_ok(s->current_el, ri, isread)) {
1689 unallocated_encoding(s);
1690 return;
1691 }
1692
1693 if (ri->accessfn) {
1694 /* Emit code to perform further access permissions checks at
1695 * runtime; this may result in an exception.
1696 */
1697 TCGv_ptr tmpptr;
1698 TCGv_i32 tcg_syn, tcg_isread;
1699 uint32_t syndrome;
1700
1701 gen_a64_set_pc_im(s->pc_curr);
1702 tmpptr = tcg_const_ptr(ri);
1703 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1704 tcg_syn = tcg_const_i32(syndrome);
1705 tcg_isread = tcg_const_i32(isread);
1706 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1707 tcg_temp_free_ptr(tmpptr);
1708 tcg_temp_free_i32(tcg_syn);
1709 tcg_temp_free_i32(tcg_isread);
1710 }
1711
1712 /* Handle special cases first */
1713 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1714 case ARM_CP_NOP:
1715 return;
1716 case ARM_CP_NZCV:
1717 tcg_rt = cpu_reg(s, rt);
1718 if (isread) {
1719 gen_get_nzcv(tcg_rt);
1720 } else {
1721 gen_set_nzcv(tcg_rt);
1722 }
1723 return;
1724 case ARM_CP_CURRENTEL:
1725 /* Reads as current EL value from pstate, which is
1726 * guaranteed to be constant by the tb flags.
1727 */
1728 tcg_rt = cpu_reg(s, rt);
1729 tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1730 return;
1731 case ARM_CP_DC_ZVA:
1732 /* Writes clear the aligned block of memory which rt points into. */
1733 tcg_rt = cpu_reg(s, rt);
1734 gen_helper_dc_zva(cpu_env, tcg_rt);
1735 return;
1736 default:
1737 break;
1738 }
1739 if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1740 return;
1741 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1742 return;
1743 }
1744
1745 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1746 gen_io_start();
1747 }
1748
1749 tcg_rt = cpu_reg(s, rt);
1750
1751 if (isread) {
1752 if (ri->type & ARM_CP_CONST) {
1753 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1754 } else if (ri->readfn) {
1755 TCGv_ptr tmpptr;
1756 tmpptr = tcg_const_ptr(ri);
1757 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1758 tcg_temp_free_ptr(tmpptr);
1759 } else {
1760 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1761 }
1762 } else {
1763 if (ri->type & ARM_CP_CONST) {
1764 /* If not forbidden by access permissions, treat as WI */
1765 return;
1766 } else if (ri->writefn) {
1767 TCGv_ptr tmpptr;
1768 tmpptr = tcg_const_ptr(ri);
1769 gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1770 tcg_temp_free_ptr(tmpptr);
1771 } else {
1772 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1773 }
1774 }
1775
1776 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1777 /* I/O operations must end the TB here (whether read or write) */
1778 s->base.is_jmp = DISAS_UPDATE;
1779 } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1780 /* We default to ending the TB on a coprocessor register write,
1781 * but allow this to be suppressed by the register definition
1782 * (usually only necessary to work around guest bugs).
1783 */
1784 s->base.is_jmp = DISAS_UPDATE;
1785 }
1786 }
1787
1788 /* System
1789 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0
1790 * +---------------------+---+-----+-----+-------+-------+-----+------+
1791 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt |
1792 * +---------------------+---+-----+-----+-------+-------+-----+------+
1793 */
1794 static void disas_system(DisasContext *s, uint32_t insn)
1795 {
1796 unsigned int l, op0, op1, crn, crm, op2, rt;
1797 l = extract32(insn, 21, 1);
1798 op0 = extract32(insn, 19, 2);
1799 op1 = extract32(insn, 16, 3);
1800 crn = extract32(insn, 12, 4);
1801 crm = extract32(insn, 8, 4);
1802 op2 = extract32(insn, 5, 3);
1803 rt = extract32(insn, 0, 5);
1804
1805 if (op0 == 0) {
1806 if (l || rt != 31) {
1807 unallocated_encoding(s);
1808 return;
1809 }
1810 switch (crn) {
1811 case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1812 handle_hint(s, insn, op1, op2, crm);
1813 break;
1814 case 3: /* CLREX, DSB, DMB, ISB */
1815 handle_sync(s, insn, op1, op2, crm);
1816 break;
1817 case 4: /* MSR (immediate) */
1818 handle_msr_i(s, insn, op1, op2, crm);
1819 break;
1820 default:
1821 unallocated_encoding(s);
1822 break;
1823 }
1824 return;
1825 }
1826 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1827 }
1828
1829 /* Exception generation
1830 *
1831 * 31 24 23 21 20 5 4 2 1 0
1832 * +-----------------+-----+------------------------+-----+----+
1833 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL |
1834 * +-----------------------+------------------------+----------+
1835 */
1836 static void disas_exc(DisasContext *s, uint32_t insn)
1837 {
1838 int opc = extract32(insn, 21, 3);
1839 int op2_ll = extract32(insn, 0, 5);
1840 int imm16 = extract32(insn, 5, 16);
1841 TCGv_i32 tmp;
1842
1843 switch (opc) {
1844 case 0:
1845 /* For SVC, HVC and SMC we advance the single-step state
1846 * machine before taking the exception. This is architecturally
1847 * mandated, to ensure that single-stepping a system call
1848 * instruction works properly.
1849 */
1850 switch (op2_ll) {
1851 case 1: /* SVC */
1852 gen_ss_advance(s);
1853 gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
1854 syn_aa64_svc(imm16), default_exception_el(s));
1855 break;
1856 case 2: /* HVC */
1857 if (s->current_el == 0) {
1858 unallocated_encoding(s);
1859 break;
1860 }
1861 /* The pre HVC helper handles cases when HVC gets trapped
1862 * as an undefined insn by runtime configuration.
1863 */
1864 gen_a64_set_pc_im(s->pc_curr);
1865 gen_helper_pre_hvc(cpu_env);
1866 gen_ss_advance(s);
1867 gen_exception_insn(s, s->base.pc_next, EXCP_HVC,
1868 syn_aa64_hvc(imm16), 2);
1869 break;
1870 case 3: /* SMC */
1871 if (s->current_el == 0) {
1872 unallocated_encoding(s);
1873 break;
1874 }
1875 gen_a64_set_pc_im(s->pc_curr);
1876 tmp = tcg_const_i32(syn_aa64_smc(imm16));
1877 gen_helper_pre_smc(cpu_env, tmp);
1878 tcg_temp_free_i32(tmp);
1879 gen_ss_advance(s);
1880 gen_exception_insn(s, s->base.pc_next, EXCP_SMC,
1881 syn_aa64_smc(imm16), 3);
1882 break;
1883 default:
1884 unallocated_encoding(s);
1885 break;
1886 }
1887 break;
1888 case 1:
1889 if (op2_ll != 0) {
1890 unallocated_encoding(s);
1891 break;
1892 }
1893 /* BRK */
1894 gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
1895 break;
1896 case 2:
1897 if (op2_ll != 0) {
1898 unallocated_encoding(s);
1899 break;
1900 }
1901 /* HLT. This has two purposes.
1902 * Architecturally, it is an external halting debug instruction.
1903 * Since QEMU doesn't implement external debug, we treat this as
1904 * it is required for halting debug disabled: it will UNDEF.
1905 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1906 */
1907 if (semihosting_enabled() && imm16 == 0xf000) {
1908 #ifndef CONFIG_USER_ONLY
1909 /* In system mode, don't allow userspace access to semihosting,
1910 * to provide some semblance of security (and for consistency
1911 * with our 32-bit semihosting).
1912 */
1913 if (s->current_el == 0) {
1914 unsupported_encoding(s, insn);
1915 break;
1916 }
1917 #endif
1918 gen_exception_internal_insn(s, s->base.pc_next, EXCP_SEMIHOST);
1919 } else {
1920 unsupported_encoding(s, insn);
1921 }
1922 break;
1923 case 5:
1924 if (op2_ll < 1 || op2_ll > 3) {
1925 unallocated_encoding(s);
1926 break;
1927 }
1928 /* DCPS1, DCPS2, DCPS3 */
1929 unsupported_encoding(s, insn);
1930 break;
1931 default:
1932 unallocated_encoding(s);
1933 break;
1934 }
1935 }
1936
1937 /* Unconditional branch (register)
1938 * 31 25 24 21 20 16 15 10 9 5 4 0
1939 * +---------------+-------+-------+-------+------+-------+
1940 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 |
1941 * +---------------+-------+-------+-------+------+-------+
1942 */
1943 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1944 {
1945 unsigned int opc, op2, op3, rn, op4;
1946 unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */
1947 TCGv_i64 dst;
1948 TCGv_i64 modifier;
1949
1950 opc = extract32(insn, 21, 4);
1951 op2 = extract32(insn, 16, 5);
1952 op3 = extract32(insn, 10, 6);
1953 rn = extract32(insn, 5, 5);
1954 op4 = extract32(insn, 0, 5);
1955
1956 if (op2 != 0x1f) {
1957 goto do_unallocated;
1958 }
1959
1960 switch (opc) {
1961 case 0: /* BR */
1962 case 1: /* BLR */
1963 case 2: /* RET */
1964 btype_mod = opc;
1965 switch (op3) {
1966 case 0:
1967 /* BR, BLR, RET */
1968 if (op4 != 0) {
1969 goto do_unallocated;
1970 }
1971 dst = cpu_reg(s, rn);
1972 break;
1973
1974 case 2:
1975 case 3:
1976 if (!dc_isar_feature(aa64_pauth, s)) {
1977 goto do_unallocated;
1978 }
1979 if (opc == 2) {
1980 /* RETAA, RETAB */
1981 if (rn != 0x1f || op4 != 0x1f) {
1982 goto do_unallocated;
1983 }
1984 rn = 30;
1985 modifier = cpu_X[31];
1986 } else {
1987 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
1988 if (op4 != 0x1f) {
1989 goto do_unallocated;
1990 }
1991 modifier = new_tmp_a64_zero(s);
1992 }
1993 if (s->pauth_active) {
1994 dst = new_tmp_a64(s);
1995 if (op3 == 2) {
1996 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
1997 } else {
1998 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
1999 }
2000 } else {
2001 dst = cpu_reg(s, rn);
2002 }
2003 break;
2004
2005 default:
2006 goto do_unallocated;
2007 }
2008 gen_a64_set_pc(s, dst);
2009 /* BLR also needs to load return address */
2010 if (opc == 1) {
2011 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2012 }
2013 break;
2014
2015 case 8: /* BRAA */
2016 case 9: /* BLRAA */
2017 if (!dc_isar_feature(aa64_pauth, s)) {
2018 goto do_unallocated;
2019 }
2020 if ((op3 & ~1) != 2) {
2021 goto do_unallocated;
2022 }
2023 btype_mod = opc & 1;
2024 if (s->pauth_active) {
2025 dst = new_tmp_a64(s);
2026 modifier = cpu_reg_sp(s, op4);
2027 if (op3 == 2) {
2028 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2029 } else {
2030 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2031 }
2032 } else {
2033 dst = cpu_reg(s, rn);
2034 }
2035 gen_a64_set_pc(s, dst);
2036 /* BLRAA also needs to load return address */
2037 if (opc == 9) {
2038 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2039 }
2040 break;
2041
2042 case 4: /* ERET */
2043 if (s->current_el == 0) {
2044 goto do_unallocated;
2045 }
2046 switch (op3) {
2047 case 0: /* ERET */
2048 if (op4 != 0) {
2049 goto do_unallocated;
2050 }
2051 dst = tcg_temp_new_i64();
2052 tcg_gen_ld_i64(dst, cpu_env,
2053 offsetof(CPUARMState, elr_el[s->current_el]));
2054 break;
2055
2056 case 2: /* ERETAA */
2057 case 3: /* ERETAB */
2058 if (!dc_isar_feature(aa64_pauth, s)) {
2059 goto do_unallocated;
2060 }
2061 if (rn != 0x1f || op4 != 0x1f) {
2062 goto do_unallocated;
2063 }
2064 dst = tcg_temp_new_i64();
2065 tcg_gen_ld_i64(dst, cpu_env,
2066 offsetof(CPUARMState, elr_el[s->current_el]));
2067 if (s->pauth_active) {
2068 modifier = cpu_X[31];
2069 if (op3 == 2) {
2070 gen_helper_autia(dst, cpu_env, dst, modifier);
2071 } else {
2072 gen_helper_autib(dst, cpu_env, dst, modifier);
2073 }
2074 }
2075 break;
2076
2077 default:
2078 goto do_unallocated;
2079 }
2080 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2081 gen_io_start();
2082 }
2083
2084 gen_helper_exception_return(cpu_env, dst);
2085 tcg_temp_free_i64(dst);
2086 /* Must exit loop to check un-masked IRQs */
2087 s->base.is_jmp = DISAS_EXIT;
2088 return;
2089
2090 case 5: /* DRPS */
2091 if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2092 goto do_unallocated;
2093 } else {
2094 unsupported_encoding(s, insn);
2095 }
2096 return;
2097
2098 default:
2099 do_unallocated:
2100 unallocated_encoding(s);
2101 return;
2102 }
2103
2104 switch (btype_mod) {
2105 case 0: /* BR */
2106 if (dc_isar_feature(aa64_bti, s)) {
2107 /* BR to {x16,x17} or !guard -> 1, else 3. */
2108 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2109 }
2110 break;
2111
2112 case 1: /* BLR */
2113 if (dc_isar_feature(aa64_bti, s)) {
2114 /* BLR sets BTYPE to 2, regardless of source guarded page. */
2115 set_btype(s, 2);
2116 }
2117 break;
2118
2119 default: /* RET or none of the above. */
2120 /* BTYPE will be set to 0 by normal end-of-insn processing. */
2121 break;
2122 }
2123
2124 s->base.is_jmp = DISAS_JUMP;
2125 }
2126
2127 /* Branches, exception generating and system instructions */
2128 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2129 {
2130 switch (extract32(insn, 25, 7)) {
2131 case 0x0a: case 0x0b:
2132 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2133 disas_uncond_b_imm(s, insn);
2134 break;
2135 case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2136 disas_comp_b_imm(s, insn);
2137 break;
2138 case 0x1b: case 0x5b: /* Test & branch (immediate) */
2139 disas_test_b_imm(s, insn);
2140 break;
2141 case 0x2a: /* Conditional branch (immediate) */
2142 disas_cond_b_imm(s, insn);
2143 break;
2144 case 0x6a: /* Exception generation / System */
2145 if (insn & (1 << 24)) {
2146 if (extract32(insn, 22, 2) == 0) {
2147 disas_system(s, insn);
2148 } else {
2149 unallocated_encoding(s);
2150 }
2151 } else {
2152 disas_exc(s, insn);
2153 }
2154 break;
2155 case 0x6b: /* Unconditional branch (register) */
2156 disas_uncond_b_reg(s, insn);
2157 break;
2158 default:
2159 unallocated_encoding(s);
2160 break;
2161 }
2162 }
2163
2164 /*
2165 * Load/Store exclusive instructions are implemented by remembering
2166 * the value/address loaded, and seeing if these are the same
2167 * when the store is performed. This is not actually the architecturally
2168 * mandated semantics, but it works for typical guest code sequences
2169 * and avoids having to monitor regular stores.
2170 *
2171 * The store exclusive uses the atomic cmpxchg primitives to avoid
2172 * races in multi-threaded linux-user and when MTTCG softmmu is
2173 * enabled.
2174 */
2175 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2176 TCGv_i64 addr, int size, bool is_pair)
2177 {
2178 int idx = get_mem_index(s);
2179 MemOp memop = s->be_data;
2180
2181 g_assert(size <= 3);
2182 if (is_pair) {
2183 g_assert(size >= 2);
2184 if (size == 2) {
2185 /* The pair must be single-copy atomic for the doubleword. */
2186 memop |= MO_64 | MO_ALIGN;
2187 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2188 if (s->be_data == MO_LE) {
2189 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2190 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2191 } else {
2192 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2193 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2194 }
2195 } else {
2196 /* The pair must be single-copy atomic for *each* doubleword, not
2197 the entire quadword, however it must be quadword aligned. */
2198 memop |= MO_64;
2199 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2200 memop | MO_ALIGN_16);
2201
2202 TCGv_i64 addr2 = tcg_temp_new_i64();
2203 tcg_gen_addi_i64(addr2, addr, 8);
2204 tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2205 tcg_temp_free_i64(addr2);
2206
2207 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2208 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2209 }
2210 } else {
2211 memop |= size | MO_ALIGN;
2212 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2213 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2214 }
2215 tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2216 }
2217
2218 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2219 TCGv_i64 addr, int size, int is_pair)
2220 {
2221 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2222 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
2223 * [addr] = {Rt};
2224 * if (is_pair) {
2225 * [addr + datasize] = {Rt2};
2226 * }
2227 * {Rd} = 0;
2228 * } else {
2229 * {Rd} = 1;
2230 * }
2231 * env->exclusive_addr = -1;
2232 */
2233 TCGLabel *fail_label = gen_new_label();
2234 TCGLabel *done_label = gen_new_label();
2235 TCGv_i64 tmp;
2236
2237 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2238
2239 tmp = tcg_temp_new_i64();
2240 if (is_pair) {
2241 if (size == 2) {
2242 if (s->be_data == MO_LE) {
2243 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2244 } else {
2245 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2246 }
2247 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2248 cpu_exclusive_val, tmp,
2249 get_mem_index(s),
2250 MO_64 | MO_ALIGN | s->be_data);
2251 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2252 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2253 if (!HAVE_CMPXCHG128) {
2254 gen_helper_exit_atomic(cpu_env);
2255 s->base.is_jmp = DISAS_NORETURN;
2256 } else if (s->be_data == MO_LE) {
2257 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2258 cpu_exclusive_addr,
2259 cpu_reg(s, rt),
2260 cpu_reg(s, rt2));
2261 } else {
2262 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2263 cpu_exclusive_addr,
2264 cpu_reg(s, rt),
2265 cpu_reg(s, rt2));
2266 }
2267 } else if (s->be_data == MO_LE) {
2268 gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2269 cpu_reg(s, rt), cpu_reg(s, rt2));
2270 } else {
2271 gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2272 cpu_reg(s, rt), cpu_reg(s, rt2));
2273 }
2274 } else {
2275 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2276 cpu_reg(s, rt), get_mem_index(s),
2277 size | MO_ALIGN | s->be_data);
2278 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2279 }
2280 tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2281 tcg_temp_free_i64(tmp);
2282 tcg_gen_br(done_label);
2283
2284 gen_set_label(fail_label);
2285 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2286 gen_set_label(done_label);
2287 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2288 }
2289
2290 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2291 int rn, int size)
2292 {
2293 TCGv_i64 tcg_rs = cpu_reg(s, rs);
2294 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2295 int memidx = get_mem_index(s);
2296 TCGv_i64 clean_addr;
2297
2298 if (rn == 31) {
2299 gen_check_sp_alignment(s);
2300 }
2301 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2302 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2303 size | MO_ALIGN | s->be_data);
2304 }
2305
2306 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2307 int rn, int size)
2308 {
2309 TCGv_i64 s1 = cpu_reg(s, rs);
2310 TCGv_i64 s2 = cpu_reg(s, rs + 1);
2311 TCGv_i64 t1 = cpu_reg(s, rt);
2312 TCGv_i64 t2 = cpu_reg(s, rt + 1);
2313 TCGv_i64 clean_addr;
2314 int memidx = get_mem_index(s);
2315
2316 if (rn == 31) {
2317 gen_check_sp_alignment(s);
2318 }
2319 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2320
2321 if (size == 2) {
2322 TCGv_i64 cmp = tcg_temp_new_i64();
2323 TCGv_i64 val = tcg_temp_new_i64();
2324
2325 if (s->be_data == MO_LE) {
2326 tcg_gen_concat32_i64(val, t1, t2);
2327 tcg_gen_concat32_i64(cmp, s1, s2);
2328 } else {
2329 tcg_gen_concat32_i64(val, t2, t1);
2330 tcg_gen_concat32_i64(cmp, s2, s1);
2331 }
2332
2333 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2334 MO_64 | MO_ALIGN | s->be_data);
2335 tcg_temp_free_i64(val);
2336
2337 if (s->be_data == MO_LE) {
2338 tcg_gen_extr32_i64(s1, s2, cmp);
2339 } else {
2340 tcg_gen_extr32_i64(s2, s1, cmp);
2341 }
2342 tcg_temp_free_i64(cmp);
2343 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2344 if (HAVE_CMPXCHG128) {
2345 TCGv_i32 tcg_rs = tcg_const_i32(rs);
2346 if (s->be_data == MO_LE) {
2347 gen_helper_casp_le_parallel(cpu_env, tcg_rs,
2348 clean_addr, t1, t2);
2349 } else {
2350 gen_helper_casp_be_parallel(cpu_env, tcg_rs,
2351 clean_addr, t1, t2);
2352 }
2353 tcg_temp_free_i32(tcg_rs);
2354 } else {
2355 gen_helper_exit_atomic(cpu_env);
2356 s->base.is_jmp = DISAS_NORETURN;
2357 }
2358 } else {
2359 TCGv_i64 d1 = tcg_temp_new_i64();
2360 TCGv_i64 d2 = tcg_temp_new_i64();
2361 TCGv_i64 a2 = tcg_temp_new_i64();
2362 TCGv_i64 c1 = tcg_temp_new_i64();
2363 TCGv_i64 c2 = tcg_temp_new_i64();
2364 TCGv_i64 zero = tcg_const_i64(0);
2365
2366 /* Load the two words, in memory order. */
2367 tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
2368 MO_64 | MO_ALIGN_16 | s->be_data);
2369 tcg_gen_addi_i64(a2, clean_addr, 8);
2370 tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
2371
2372 /* Compare the two words, also in memory order. */
2373 tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2374 tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2375 tcg_gen_and_i64(c2, c2, c1);
2376
2377 /* If compare equal, write back new data, else write back old data. */
2378 tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2379 tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2380 tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
2381 tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2382 tcg_temp_free_i64(a2);
2383 tcg_temp_free_i64(c1);
2384 tcg_temp_free_i64(c2);
2385 tcg_temp_free_i64(zero);
2386
2387 /* Write back the data from memory to Rs. */
2388 tcg_gen_mov_i64(s1, d1);
2389 tcg_gen_mov_i64(s2, d2);
2390 tcg_temp_free_i64(d1);
2391 tcg_temp_free_i64(d2);
2392 }
2393 }
2394
2395 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2396 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2397 */
2398 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2399 {
2400 int opc0 = extract32(opc, 0, 1);
2401 int regsize;
2402
2403 if (is_signed) {
2404 regsize = opc0 ? 32 : 64;
2405 } else {
2406 regsize = size == 3 ? 64 : 32;
2407 }
2408 return regsize == 64;
2409 }
2410
2411 /* Load/store exclusive
2412 *
2413 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0
2414 * +-----+-------------+----+---+----+------+----+-------+------+------+
2415 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt |
2416 * +-----+-------------+----+---+----+------+----+-------+------+------+
2417 *
2418 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2419 * L: 0 -> store, 1 -> load
2420 * o2: 0 -> exclusive, 1 -> not
2421 * o1: 0 -> single register, 1 -> register pair
2422 * o0: 1 -> load-acquire/store-release, 0 -> not
2423 */
2424 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2425 {
2426 int rt = extract32(insn, 0, 5);
2427 int rn = extract32(insn, 5, 5);
2428 int rt2 = extract32(insn, 10, 5);
2429 int rs = extract32(insn, 16, 5);
2430 int is_lasr = extract32(insn, 15, 1);
2431 int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2432 int size = extract32(insn, 30, 2);
2433 TCGv_i64 clean_addr;
2434
2435 switch (o2_L_o1_o0) {
2436 case 0x0: /* STXR */
2437 case 0x1: /* STLXR */
2438 if (rn == 31) {
2439 gen_check_sp_alignment(s);
2440 }
2441 if (is_lasr) {
2442 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2443 }
2444 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2445 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2446 return;
2447
2448 case 0x4: /* LDXR */
2449 case 0x5: /* LDAXR */
2450 if (rn == 31) {
2451 gen_check_sp_alignment(s);
2452 }
2453 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2454 s->is_ldex = true;
2455 gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2456 if (is_lasr) {
2457 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2458 }
2459 return;
2460
2461 case 0x8: /* STLLR */
2462 if (!dc_isar_feature(aa64_lor, s)) {
2463 break;
2464 }
2465 /* StoreLORelease is the same as Store-Release for QEMU. */
2466 /* fall through */
2467 case 0x9: /* STLR */
2468 /* Generate ISS for non-exclusive accesses including LASR. */
2469 if (rn == 31) {
2470 gen_check_sp_alignment(s);
2471 }
2472 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2473 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2474 do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
2475 disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2476 return;
2477
2478 case 0xc: /* LDLAR */
2479 if (!dc_isar_feature(aa64_lor, s)) {
2480 break;
2481 }
2482 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
2483 /* fall through */
2484 case 0xd: /* LDAR */
2485 /* Generate ISS for non-exclusive accesses including LASR. */
2486 if (rn == 31) {
2487 gen_check_sp_alignment(s);
2488 }
2489 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2490 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
2491 disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2492 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2493 return;
2494
2495 case 0x2: case 0x3: /* CASP / STXP */
2496 if (size & 2) { /* STXP / STLXP */
2497 if (rn == 31) {
2498 gen_check_sp_alignment(s);
2499 }
2500 if (is_lasr) {
2501 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2502 }
2503 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2504 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2505 return;
2506 }
2507 if (rt2 == 31
2508 && ((rt | rs) & 1) == 0
2509 && dc_isar_feature(aa64_atomics, s)) {
2510 /* CASP / CASPL */
2511 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2512 return;
2513 }
2514 break;
2515
2516 case 0x6: case 0x7: /* CASPA / LDXP */
2517 if (size & 2) { /* LDXP / LDAXP */
2518 if (rn == 31) {
2519 gen_check_sp_alignment(s);
2520 }
2521 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2522 s->is_ldex = true;
2523 gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2524 if (is_lasr) {
2525 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2526 }
2527 return;
2528 }
2529 if (rt2 == 31
2530 && ((rt | rs) & 1) == 0
2531 && dc_isar_feature(aa64_atomics, s)) {
2532 /* CASPA / CASPAL */
2533 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2534 return;
2535 }
2536 break;
2537
2538 case 0xa: /* CAS */
2539 case 0xb: /* CASL */
2540 case 0xe: /* CASA */
2541 case 0xf: /* CASAL */
2542 if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2543 gen_compare_and_swap(s, rs, rt, rn, size);
2544 return;
2545 }
2546 break;
2547 }
2548 unallocated_encoding(s);
2549 }
2550
2551 /*
2552 * Load register (literal)
2553 *
2554 * 31 30 29 27 26 25 24 23 5 4 0
2555 * +-----+-------+---+-----+-------------------+-------+
2556 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt |
2557 * +-----+-------+---+-----+-------------------+-------+
2558 *
2559 * V: 1 -> vector (simd/fp)
2560 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2561 * 10-> 32 bit signed, 11 -> prefetch
2562 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2563 */
2564 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2565 {
2566 int rt = extract32(insn, 0, 5);
2567 int64_t imm = sextract32(insn, 5, 19) << 2;
2568 bool is_vector = extract32(insn, 26, 1);
2569 int opc = extract32(insn, 30, 2);
2570 bool is_signed = false;
2571 int size = 2;
2572 TCGv_i64 tcg_rt, clean_addr;
2573
2574 if (is_vector) {
2575 if (opc == 3) {
2576 unallocated_encoding(s);
2577 return;
2578 }
2579 size = 2 + opc;
2580 if (!fp_access_check(s)) {
2581 return;
2582 }
2583 } else {
2584 if (opc == 3) {
2585 /* PRFM (literal) : prefetch */
2586 return;
2587 }
2588 size = 2 + extract32(opc, 0, 1);
2589 is_signed = extract32(opc, 1, 1);
2590 }
2591
2592 tcg_rt = cpu_reg(s, rt);
2593
2594 clean_addr = tcg_const_i64(s->pc_curr + imm);
2595 if (is_vector) {
2596 do_fp_ld(s, rt, clean_addr, size);
2597 } else {
2598 /* Only unsigned 32bit loads target 32bit registers. */
2599 bool iss_sf = opc != 0;
2600
2601 do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
2602 true, rt, iss_sf, false);
2603 }
2604 tcg_temp_free_i64(clean_addr);
2605 }
2606
2607 /*
2608 * LDNP (Load Pair - non-temporal hint)
2609 * LDP (Load Pair - non vector)
2610 * LDPSW (Load Pair Signed Word - non vector)
2611 * STNP (Store Pair - non-temporal hint)
2612 * STP (Store Pair - non vector)
2613 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2614 * LDP (Load Pair of SIMD&FP)
2615 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2616 * STP (Store Pair of SIMD&FP)
2617 *
2618 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0
2619 * +-----+-------+---+---+-------+---+-----------------------------+
2620 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt |
2621 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2622 *
2623 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
2624 * LDPSW 01
2625 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2626 * V: 0 -> GPR, 1 -> Vector
2627 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2628 * 10 -> signed offset, 11 -> pre-index
2629 * L: 0 -> Store 1 -> Load
2630 *
2631 * Rt, Rt2 = GPR or SIMD registers to be stored
2632 * Rn = general purpose register containing address
2633 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2634 */
2635 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2636 {
2637 int rt = extract32(insn, 0, 5);
2638 int rn = extract32(insn, 5, 5);
2639 int rt2 = extract32(insn, 10, 5);
2640 uint64_t offset = sextract64(insn, 15, 7);
2641 int index = extract32(insn, 23, 2);
2642 bool is_vector = extract32(insn, 26, 1);
2643 bool is_load = extract32(insn, 22, 1);
2644 int opc = extract32(insn, 30, 2);
2645
2646 bool is_signed = false;
2647 bool postindex = false;
2648 bool wback = false;
2649
2650 TCGv_i64 clean_addr, dirty_addr;
2651
2652 int size;
2653
2654 if (opc == 3) {
2655 unallocated_encoding(s);
2656 return;
2657 }
2658
2659 if (is_vector) {
2660 size = 2 + opc;
2661 } else {
2662 size = 2 + extract32(opc, 1, 1);
2663 is_signed = extract32(opc, 0, 1);
2664 if (!is_load && is_signed) {
2665 unallocated_encoding(s);
2666 return;
2667 }
2668 }
2669
2670 switch (index) {
2671 case 1: /* post-index */
2672 postindex = true;
2673 wback = true;
2674 break;
2675 case 0:
2676 /* signed offset with "non-temporal" hint. Since we don't emulate
2677 * caches we don't care about hints to the cache system about
2678 * data access patterns, and handle this identically to plain
2679 * signed offset.
2680 */
2681 if (is_signed) {
2682 /* There is no non-temporal-hint version of LDPSW */
2683 unallocated_encoding(s);
2684 return;
2685 }
2686 postindex = false;
2687 break;
2688 case 2: /* signed offset, rn not updated */
2689 postindex = false;
2690 break;
2691 case 3: /* pre-index */
2692 postindex = false;
2693 wback = true;
2694 break;
2695 }
2696
2697 if (is_vector && !fp_access_check(s)) {
2698 return;
2699 }
2700
2701 offset <<= size;
2702
2703 if (rn == 31) {
2704 gen_check_sp_alignment(s);
2705 }
2706
2707 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2708 if (!postindex) {
2709 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2710 }
2711 clean_addr = clean_data_tbi(s, dirty_addr);
2712
2713 if (is_vector) {
2714 if (is_load) {
2715 do_fp_ld(s, rt, clean_addr, size);
2716 } else {
2717 do_fp_st(s, rt, clean_addr, size);
2718 }
2719 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2720 if (is_load) {
2721 do_fp_ld(s, rt2, clean_addr, size);
2722 } else {
2723 do_fp_st(s, rt2, clean_addr, size);
2724 }
2725 } else {
2726 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2727 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2728
2729 if (is_load) {
2730 TCGv_i64 tmp = tcg_temp_new_i64();
2731
2732 /* Do not modify tcg_rt before recognizing any exception
2733 * from the second load.
2734 */
2735 do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
2736 false, 0, false, false);
2737 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2738 do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
2739 false, 0, false, false);
2740
2741 tcg_gen_mov_i64(tcg_rt, tmp);
2742 tcg_temp_free_i64(tmp);
2743 } else {
2744 do_gpr_st(s, tcg_rt, clean_addr, size,
2745 false, 0, false, false);
2746 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2747 do_gpr_st(s, tcg_rt2, clean_addr, size,
2748 false, 0, false, false);
2749 }
2750 }
2751
2752 if (wback) {
2753 if (postindex) {
2754 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2755 }
2756 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2757 }
2758 }
2759
2760 /*
2761 * Load/store (immediate post-indexed)
2762 * Load/store (immediate pre-indexed)
2763 * Load/store (unscaled immediate)
2764 *
2765 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0
2766 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2767 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt |
2768 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2769 *
2770 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2771 10 -> unprivileged
2772 * V = 0 -> non-vector
2773 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2774 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2775 */
2776 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2777 int opc,
2778 int size,
2779 int rt,
2780 bool is_vector)
2781 {
2782 int rn = extract32(insn, 5, 5);
2783 int imm9 = sextract32(insn, 12, 9);
2784 int idx = extract32(insn, 10, 2);
2785 bool is_signed = false;
2786 bool is_store = false;
2787 bool is_extended = false;
2788 bool is_unpriv = (idx == 2);
2789 bool iss_valid = !is_vector;
2790 bool post_index;
2791 bool writeback;
2792
2793 TCGv_i64 clean_addr, dirty_addr;
2794
2795 if (is_vector) {
2796 size |= (opc & 2) << 1;
2797 if (size > 4 || is_unpriv) {
2798 unallocated_encoding(s);
2799 return;
2800 }
2801 is_store = ((opc & 1) == 0);
2802 if (!fp_access_check(s)) {
2803 return;
2804 }
2805 } else {
2806 if (size == 3 && opc == 2) {
2807 /* PRFM - prefetch */
2808 if (idx != 0) {
2809 unallocated_encoding(s);
2810 return;
2811 }
2812 return;
2813 }
2814 if (opc == 3 && size > 1) {
2815 unallocated_encoding(s);
2816 return;
2817 }
2818 is_store = (opc == 0);
2819 is_signed = extract32(opc, 1, 1);
2820 is_extended = (size < 3) && extract32(opc, 0, 1);
2821 }
2822
2823 switch (idx) {
2824 case 0:
2825 case 2:
2826 post_index = false;
2827 writeback = false;
2828 break;
2829 case 1:
2830 post_index = true;
2831 writeback = true;
2832 break;
2833 case 3:
2834 post_index = false;
2835 writeback = true;
2836 break;
2837 default:
2838 g_assert_not_reached();
2839 }
2840
2841 if (rn == 31) {
2842 gen_check_sp_alignment(s);
2843 }
2844
2845 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2846 if (!post_index) {
2847 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2848 }
2849 clean_addr = clean_data_tbi(s, dirty_addr);
2850
2851 if (is_vector) {
2852 if (is_store) {
2853 do_fp_st(s, rt, clean_addr, size);
2854 } else {
2855 do_fp_ld(s, rt, clean_addr, size);
2856 }
2857 } else {
2858 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2859 int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2860 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2861
2862 if (is_store) {
2863 do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
2864 iss_valid, rt, iss_sf, false);
2865 } else {
2866 do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
2867 is_signed, is_extended, memidx,
2868 iss_valid, rt, iss_sf, false);
2869 }
2870 }
2871
2872 if (writeback) {
2873 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2874 if (post_index) {
2875 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2876 }
2877 tcg_gen_mov_i64(tcg_rn, dirty_addr);
2878 }
2879 }
2880
2881 /*
2882 * Load/store (register offset)
2883 *
2884 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2885 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2886 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt |
2887 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2888 *
2889 * For non-vector:
2890 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2891 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2892 * For vector:
2893 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2894 * opc<0>: 0 -> store, 1 -> load
2895 * V: 1 -> vector/simd
2896 * opt: extend encoding (see DecodeRegExtend)
2897 * S: if S=1 then scale (essentially index by sizeof(size))
2898 * Rt: register to transfer into/out of
2899 * Rn: address register or SP for base
2900 * Rm: offset register or ZR for offset
2901 */
2902 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2903 int opc,
2904 int size,
2905 int rt,
2906 bool is_vector)
2907 {
2908 int rn = extract32(insn, 5, 5);
2909 int shift = extract32(insn, 12, 1);
2910 int rm = extract32(insn, 16, 5);
2911 int opt = extract32(insn, 13, 3);
2912 bool is_signed = false;
2913 bool is_store = false;
2914 bool is_extended = false;
2915
2916 TCGv_i64 tcg_rm, clean_addr, dirty_addr;
2917
2918 if (extract32(opt, 1, 1) == 0) {
2919 unallocated_encoding(s);
2920 return;
2921 }
2922
2923 if (is_vector) {
2924 size |= (opc & 2) << 1;
2925 if (size > 4) {
2926 unallocated_encoding(s);
2927 return;
2928 }
2929 is_store = !extract32(opc, 0, 1);
2930 if (!fp_access_check(s)) {
2931 return;
2932 }
2933 } else {
2934 if (size == 3 && opc == 2) {
2935 /* PRFM - prefetch */
2936 return;
2937 }
2938 if (opc == 3 && size > 1) {
2939 unallocated_encoding(s);
2940 return;
2941 }
2942 is_store = (opc == 0);
2943 is_signed = extract32(opc, 1, 1);
2944 is_extended = (size < 3) && extract32(opc, 0, 1);
2945 }
2946
2947 if (rn == 31) {
2948 gen_check_sp_alignment(s);
2949 }
2950 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2951
2952 tcg_rm = read_cpu_reg(s, rm, 1);
2953 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2954
2955 tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
2956 clean_addr = clean_data_tbi(s, dirty_addr);
2957
2958 if (is_vector) {
2959 if (is_store) {
2960 do_fp_st(s, rt, clean_addr, size);
2961 } else {
2962 do_fp_ld(s, rt, clean_addr, size);
2963 }
2964 } else {
2965 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2966 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2967 if (is_store) {
2968 do_gpr_st(s, tcg_rt, clean_addr, size,
2969 true, rt, iss_sf, false);
2970 } else {
2971 do_gpr_ld(s, tcg_rt, clean_addr, size,
2972 is_signed, is_extended,
2973 true, rt, iss_sf, false);
2974 }
2975 }
2976 }
2977
2978 /*
2979 * Load/store (unsigned immediate)
2980 *
2981 * 31 30 29 27 26 25 24 23 22 21 10 9 5
2982 * +----+-------+---+-----+-----+------------+-------+------+
2983 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt |
2984 * +----+-------+---+-----+-----+------------+-------+------+
2985 *
2986 * For non-vector:
2987 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2988 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2989 * For vector:
2990 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2991 * opc<0>: 0 -> store, 1 -> load
2992 * Rn: base address register (inc SP)
2993 * Rt: target register
2994 */
2995 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2996 int opc,
2997 int size,
2998 int rt,
2999 bool is_vector)
3000 {
3001 int rn = extract32(insn, 5, 5);
3002 unsigned int imm12 = extract32(insn, 10, 12);
3003 unsigned int offset;
3004
3005 TCGv_i64 clean_addr, dirty_addr;
3006
3007 bool is_store;
3008 bool is_signed = false;
3009 bool is_extended = false;
3010
3011 if (is_vector) {
3012 size |= (opc & 2) << 1;
3013 if (size > 4) {
3014 unallocated_encoding(s);
3015 return;
3016 }
3017 is_store = !extract32(opc, 0, 1);
3018 if (!fp_access_check(s)) {
3019 return;
3020 }
3021 } else {
3022 if (size == 3 && opc == 2) {
3023 /* PRFM - prefetch */
3024 return;
3025 }
3026 if (opc == 3 && size > 1) {
3027 unallocated_encoding(s);
3028 return;
3029 }
3030 is_store = (opc == 0);
3031 is_signed = extract32(opc, 1, 1);
3032 is_extended = (size < 3) && extract32(opc, 0, 1);
3033 }
3034
3035 if (rn == 31) {
3036 gen_check_sp_alignment(s);
3037 }
3038 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3039 offset = imm12 << size;
3040 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3041 clean_addr = clean_data_tbi(s, dirty_addr);
3042
3043 if (is_vector) {
3044 if (is_store) {
3045 do_fp_st(s, rt, clean_addr, size);
3046 } else {
3047 do_fp_ld(s, rt, clean_addr, size);
3048 }
3049 } else {
3050 TCGv_i64 tcg_rt = cpu_reg(s, rt);
3051 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3052 if (is_store) {
3053 do_gpr_st(s, tcg_rt, clean_addr, size,
3054 true, rt, iss_sf, false);
3055 } else {
3056 do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended,
3057 true, rt, iss_sf, false);
3058 }
3059 }
3060 }
3061
3062 /* Atomic memory operations
3063 *
3064 * 31 30 27 26 24 22 21 16 15 12 10 5 0
3065 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3066 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt |
3067 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3068 *
3069 * Rt: the result register
3070 * Rn: base address or SP
3071 * Rs: the source register for the operation
3072 * V: vector flag (always 0 as of v8.3)
3073 * A: acquire flag
3074 * R: release flag
3075 */
3076 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3077 int size, int rt, bool is_vector)
3078 {
3079 int rs = extract32(insn, 16, 5);
3080 int rn = extract32(insn, 5, 5);
3081 int o3_opc = extract32(insn, 12, 4);
3082 TCGv_i64 tcg_rs, clean_addr;
3083 AtomicThreeOpFn *fn;
3084
3085 if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3086 unallocated_encoding(s);
3087 return;
3088 }
3089 switch (o3_opc) {
3090 case 000: /* LDADD */
3091 fn = tcg_gen_atomic_fetch_add_i64;
3092 break;
3093 case 001: /* LDCLR */
3094 fn = tcg_gen_atomic_fetch_and_i64;
3095 break;
3096 case 002: /* LDEOR */
3097 fn = tcg_gen_atomic_fetch_xor_i64;
3098 break;
3099 case 003: /* LDSET */
3100 fn = tcg_gen_atomic_fetch_or_i64;
3101 break;
3102 case 004: /* LDSMAX */
3103 fn = tcg_gen_atomic_fetch_smax_i64;
3104 break;
3105 case 005: /* LDSMIN */
3106 fn = tcg_gen_atomic_fetch_smin_i64;
3107 break;
3108 case 006: /* LDUMAX */
3109 fn = tcg_gen_atomic_fetch_umax_i64;
3110 break;
3111 case 007: /* LDUMIN */
3112 fn = tcg_gen_atomic_fetch_umin_i64;
3113 break;
3114 case 010: /* SWP */
3115 fn = tcg_gen_atomic_xchg_i64;
3116 break;
3117 default:
3118 unallocated_encoding(s);
3119 return;
3120 }
3121
3122 if (rn == 31) {
3123 gen_check_sp_alignment(s);
3124 }
3125 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
3126 tcg_rs = read_cpu_reg(s, rs, true);
3127
3128 if (o3_opc == 1) { /* LDCLR */
3129 tcg_gen_not_i64(tcg_rs, tcg_rs);
3130 }
3131
3132 /* The tcg atomic primitives are all full barriers. Therefore we
3133 * can ignore the Acquire and Release bits of this instruction.
3134 */
3135 fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s),
3136 s->be_data | size | MO_ALIGN);
3137 }
3138
3139 /*
3140 * PAC memory operations
3141 *
3142 * 31 30 27 26 24 22 21 12 11 10 5 0
3143 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3144 * | size | 1 1 1 | V | 0 0 | M S | 1 | imm9 | W | 1 | Rn | Rt |
3145 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3146 *
3147 * Rt: the result register
3148 * Rn: base address or SP
3149 * V: vector flag (always 0 as of v8.3)
3150 * M: clear for key DA, set for key DB
3151 * W: pre-indexing flag
3152 * S: sign for imm9.
3153 */
3154 static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3155 int size, int rt, bool is_vector)
3156 {
3157 int rn = extract32(insn, 5, 5);
3158 bool is_wback = extract32(insn, 11, 1);
3159 bool use_key_a = !extract32(insn, 23, 1);
3160 int offset;
3161 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3162
3163 if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3164 unallocated_encoding(s);
3165 return;
3166 }
3167
3168 if (rn == 31) {
3169 gen_check_sp_alignment(s);
3170 }
3171 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3172
3173 if (s->pauth_active) {
3174 if (use_key_a) {
3175 gen_helper_autda(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3176 } else {
3177 gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3178 }
3179 }
3180
3181 /* Form the 10-bit signed, scaled offset. */
3182 offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3183 offset = sextract32(offset << size, 0, 10 + size);
3184 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3185
3186 /* Note that "clean" and "dirty" here refer to TBI not PAC. */
3187 clean_addr = clean_data_tbi(s, dirty_addr);
3188
3189 tcg_rt = cpu_reg(s, rt);
3190 do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
3191 /* extend */ false, /* iss_valid */ !is_wback,
3192 /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3193
3194 if (is_wback) {
3195 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3196 }
3197 }
3198
3199 /* Load/store register (all forms) */
3200 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3201 {
3202 int rt = extract32(insn, 0, 5);
3203 int opc = extract32(insn, 22, 2);
3204 bool is_vector = extract32(insn, 26, 1);
3205 int size = extract32(insn, 30, 2);
3206
3207 switch (extract32(insn, 24, 2)) {
3208 case 0:
3209 if (extract32(insn, 21, 1) == 0) {
3210 /* Load/store register (unscaled immediate)
3211 * Load/store immediate pre/post-indexed
3212 * Load/store register unprivileged
3213 */
3214 disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3215 return;
3216 }
3217 switch (extract32(insn, 10, 2)) {
3218 case 0:
3219 disas_ldst_atomic(s, insn, size, rt, is_vector);
3220 return;
3221 case 2:
3222 disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3223 return;
3224 default:
3225 disas_ldst_pac(s, insn, size, rt, is_vector);
3226 return;
3227 }
3228 break;
3229 case 1:
3230 disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3231 return;
3232 }
3233 unallocated_encoding(s);
3234 }
3235
3236 /* AdvSIMD load/store multiple structures
3237 *
3238 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
3239 * +---+---+---------------+---+-------------+--------+------+------+------+
3240 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
3241 * +---+---+---------------+---+-------------+--------+------+------+------+
3242 *
3243 * AdvSIMD load/store multiple structures (post-indexed)
3244 *
3245 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
3246 * +---+---+---------------+---+---+---------+--------+------+------+------+
3247 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
3248 * +---+---+---------------+---+---+---------+--------+------+------+------+
3249 *
3250 * Rt: first (or only) SIMD&FP register to be transferred
3251 * Rn: base address or SP
3252 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3253 */
3254 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3255 {
3256 int rt = extract32(insn, 0, 5);
3257 int rn = extract32(insn, 5, 5);
3258 int rm = extract32(insn, 16, 5);
3259 int size = extract32(insn, 10, 2);
3260 int opcode = extract32(insn, 12, 4);
3261 bool is_store = !extract32(insn, 22, 1);
3262 bool is_postidx = extract32(insn, 23, 1);
3263 bool is_q = extract32(insn, 30, 1);
3264 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3265 MemOp endian = s->be_data;
3266
3267 int ebytes; /* bytes per element */
3268 int elements; /* elements per vector */
3269 int rpt; /* num iterations */
3270 int selem; /* structure elements */
3271 int r;
3272
3273 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3274 unallocated_encoding(s);
3275 return;
3276 }
3277
3278 if (!is_postidx && rm != 0) {
3279 unallocated_encoding(s);
3280 return;
3281 }
3282
3283 /* From the shared decode logic */
3284 switch (opcode) {
3285 case 0x0:
3286 rpt = 1;
3287 selem = 4;
3288 break;
3289 case 0x2:
3290 rpt = 4;
3291 selem = 1;
3292 break;
3293 case 0x4:
3294 rpt = 1;
3295 selem = 3;
3296 break;
3297 case 0x6:
3298 rpt = 3;
3299 selem = 1;
3300 break;
3301 case 0x7:
3302 rpt = 1;
3303 selem = 1;
3304 break;
3305 case 0x8:
3306 rpt = 1;
3307 selem = 2;
3308 break;
3309 case 0xa:
3310 rpt = 2;
3311 selem = 1;
3312 break;
3313 default:
3314 unallocated_encoding(s);
3315 return;
3316 }
3317
3318 if (size == 3 && !is_q && selem != 1) {
3319 /* reserved */
3320 unallocated_encoding(s);
3321 return;
3322 }
3323
3324 if (!fp_access_check(s)) {
3325 return;
3326 }
3327
3328 if (rn == 31) {
3329 gen_check_sp_alignment(s);
3330 }
3331
3332 /* For our purposes, bytes are always little-endian. */
3333 if (size == 0) {
3334 endian = MO_LE;
3335 }
3336
3337 /* Consecutive little-endian elements from a single register
3338 * can be promoted to a larger little-endian operation.
3339 */
3340 if (selem == 1 && endian == MO_LE) {
3341 size = 3;
3342 }
3343 ebytes = 1 << size;
3344 elements = (is_q ? 16 : 8) / ebytes;
3345
3346 tcg_rn = cpu_reg_sp(s, rn);
3347 clean_addr = clean_data_tbi(s, tcg_rn);
3348 tcg_ebytes = tcg_const_i64(ebytes);
3349
3350 for (r = 0; r < rpt; r++) {
3351 int e;
3352 for (e = 0; e < elements; e++) {
3353 int xs;
3354 for (xs = 0; xs < selem; xs++) {
3355 int tt = (rt + r + xs) % 32;
3356 if (is_store) {
3357 do_vec_st(s, tt, e, clean_addr, size, endian);
3358 } else {
3359 do_vec_ld(s, tt, e, clean_addr, size, endian);
3360 }
3361 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3362 }
3363 }
3364 }
3365 tcg_temp_free_i64(tcg_ebytes);
3366
3367 if (!is_store) {
3368 /* For non-quad operations, setting a slice of the low
3369 * 64 bits of the register clears the high 64 bits (in
3370 * the ARM ARM pseudocode this is implicit in the fact
3371 * that 'rval' is a 64 bit wide variable).
3372 * For quad operations, we might still need to zero the
3373 * high bits of SVE.
3374 */
3375 for (r = 0; r < rpt * selem; r++) {
3376 int tt = (rt + r) % 32;
3377 clear_vec_high(s, is_q, tt);
3378 }
3379 }
3380
3381 if (is_postidx) {
3382 if (rm == 31) {
3383 tcg_gen_addi_i64(tcg_rn, tcg_rn, rpt * elements * selem * ebytes);
3384 } else {
3385 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3386 }
3387 }
3388 }
3389
3390 /* AdvSIMD load/store single structure
3391 *
3392 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
3393 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3394 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt |
3395 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3396 *
3397 * AdvSIMD load/store single structure (post-indexed)
3398 *
3399 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
3400 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3401 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt |
3402 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3403 *
3404 * Rt: first (or only) SIMD&FP register to be transferred
3405 * Rn: base address or SP
3406 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3407 * index = encoded in Q:S:size dependent on size
3408 *
3409 * lane_size = encoded in R, opc
3410 * transfer width = encoded in opc, S, size
3411 */
3412 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3413 {
3414 int rt = extract32(insn, 0, 5);
3415 int rn = extract32(insn, 5, 5);
3416 int rm = extract32(insn, 16, 5);
3417 int size = extract32(insn, 10, 2);
3418 int S = extract32(insn, 12, 1);
3419 int opc = extract32(insn, 13, 3);
3420 int R = extract32(insn, 21, 1);
3421 int is_load = extract32(insn, 22, 1);
3422 int is_postidx = extract32(insn, 23, 1);
3423 int is_q = extract32(insn, 30, 1);
3424
3425 int scale = extract32(opc, 1, 2);
3426 int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3427 bool replicate = false;
3428 int index = is_q << 3 | S << 2 | size;
3429 int ebytes, xs;
3430 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3431
3432 if (extract32(insn, 31, 1)) {
3433 unallocated_encoding(s);
3434 return;
3435 }
3436 if (!is_postidx && rm != 0) {
3437 unallocated_encoding(s);
3438 return;
3439 }
3440
3441 switch (scale) {
3442 case 3:
3443 if (!is_load || S) {
3444 unallocated_encoding(s);
3445 return;
3446 }
3447 scale = size;
3448 replicate = true;
3449 break;
3450 case 0:
3451 break;
3452 case 1:
3453 if (extract32(size, 0, 1)) {
3454 unallocated_encoding(s);
3455 return;
3456 }
3457 index >>= 1;
3458 break;
3459 case 2:
3460 if (extract32(size, 1, 1)) {
3461 unallocated_encoding(s);
3462 return;
3463 }
3464 if (!extract32(size, 0, 1)) {
3465 index >>= 2;
3466 } else {
3467 if (S) {
3468 unallocated_encoding(s);
3469 return;
3470 }
3471 index >>= 3;
3472 scale = 3;
3473 }
3474 break;
3475 default:
3476 g_assert_not_reached();
3477 }
3478
3479 if (!fp_access_check(s)) {
3480 return;
3481 }
3482
3483 ebytes = 1 << scale;
3484
3485 if (rn == 31) {
3486 gen_check_sp_alignment(s);
3487 }
3488
3489 tcg_rn = cpu_reg_sp(s, rn);
3490 clean_addr = clean_data_tbi(s, tcg_rn);
3491 tcg_ebytes = tcg_const_i64(ebytes);
3492
3493 for (xs = 0; xs < selem; xs++) {
3494 if (replicate) {
3495 /* Load and replicate to all elements */
3496 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3497
3498 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr,
3499 get_mem_index(s), s->be_data + scale);
3500 tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3501 (is_q + 1) * 8, vec_full_reg_size(s),
3502 tcg_tmp);
3503 tcg_temp_free_i64(tcg_tmp);
3504 } else {
3505 /* Load/store one element per register */
3506 if (is_load) {
3507 do_vec_ld(s, rt, index, clean_addr, scale, s->be_data);
3508 } else {
3509 do_vec_st(s, rt, index, clean_addr, scale, s->be_data);
3510 }
3511 }
3512 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3513 rt = (rt + 1) % 32;
3514 }
3515 tcg_temp_free_i64(tcg_ebytes);
3516
3517 if (is_postidx) {
3518 if (rm == 31) {
3519 tcg_gen_addi_i64(tcg_rn, tcg_rn, selem * ebytes);
3520 } else {
3521 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3522 }
3523 }
3524 }
3525
3526 /* Loads and stores */
3527 static void disas_ldst(DisasContext *s, uint32_t insn)
3528 {
3529 switch (extract32(insn, 24, 6)) {
3530 case 0x08: /* Load/store exclusive */
3531 disas_ldst_excl(s, insn);
3532 break;
3533 case 0x18: case 0x1c: /* Load register (literal) */
3534 disas_ld_lit(s, insn);
3535 break;
3536 case 0x28: case 0x29:
3537 case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3538 disas_ldst_pair(s, insn);
3539 break;
3540 case 0x38: case 0x39:
3541 case 0x3c: case 0x3d: /* Load/store register (all forms) */
3542 disas_ldst_reg(s, insn);
3543 break;
3544 case 0x0c: /* AdvSIMD load/store multiple structures */
3545 disas_ldst_multiple_struct(s, insn);
3546 break;
3547 case 0x0d: /* AdvSIMD load/store single structure */
3548 disas_ldst_single_struct(s, insn);
3549 break;
3550 default:
3551 unallocated_encoding(s);
3552 break;
3553 }
3554 }
3555
3556 /* PC-rel. addressing
3557 * 31 30 29 28 24 23 5 4 0
3558 * +----+-------+-----------+-------------------+------+
3559 * | op | immlo | 1 0 0 0 0 | immhi | Rd |
3560 * +----+-------+-----------+-------------------+------+
3561 */
3562 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3563 {
3564 unsigned int page, rd;
3565 uint64_t base;
3566 uint64_t offset;
3567
3568 page = extract32(insn, 31, 1);
3569 /* SignExtend(immhi:immlo) -> offset */
3570 offset = sextract64(insn, 5, 19);
3571 offset = offset << 2 | extract32(insn, 29, 2);
3572 rd = extract32(insn, 0, 5);
3573 base = s->pc_curr;
3574
3575 if (page) {
3576 /* ADRP (page based) */
3577 base &= ~0xfff;
3578 offset <<= 12;
3579 }
3580
3581 tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3582 }
3583
3584 /*
3585 * Add/subtract (immediate)
3586 *
3587 * 31 30 29 28 24 23 22 21 10 9 5 4 0
3588 * +--+--+--+-----------+-----+-------------+-----+-----+
3589 * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd |
3590 * +--+--+--+-----------+-----+-------------+-----+-----+
3591 *
3592 * sf: 0 -> 32bit, 1 -> 64bit
3593 * op: 0 -> add , 1 -> sub
3594 * S: 1 -> set flags
3595 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3596 */
3597 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3598 {
3599 int rd = extract32(insn, 0, 5);
3600 int rn = extract32(insn, 5, 5);
3601 uint64_t imm = extract32(insn, 10, 12);
3602 int shift = extract32(insn, 22, 2);
3603 bool setflags = extract32(insn, 29, 1);
3604 bool sub_op = extract32(insn, 30, 1);
3605 bool is_64bit = extract32(insn, 31, 1);
3606
3607 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3608 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3609 TCGv_i64 tcg_result;
3610
3611 switch (shift) {
3612 case 0x0:
3613 break;
3614 case 0x1:
3615 imm <<= 12;
3616 break;
3617 default:
3618 unallocated_encoding(s);
3619 return;
3620 }
3621
3622 tcg_result = tcg_temp_new_i64();
3623 if (!setflags) {
3624 if (sub_op) {
3625 tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3626 } else {
3627 tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3628 }
3629 } else {
3630 TCGv_i64 tcg_imm = tcg_const_i64(imm);
3631 if (sub_op) {
3632 gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3633 } else {
3634 gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3635 }
3636 tcg_temp_free_i64(tcg_imm);
3637 }
3638
3639 if (is_64bit) {
3640 tcg_gen_mov_i64(tcg_rd, tcg_result);
3641 } else {
3642 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3643 }
3644
3645 tcg_temp_free_i64(tcg_result);
3646 }
3647
3648 /* The input should be a value in the bottom e bits (with higher
3649 * bits zero); returns that value replicated into every element
3650 * of size e in a 64 bit integer.
3651 */
3652 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3653 {
3654 assert(e != 0);
3655 while (e < 64) {
3656 mask |= mask << e;
3657 e *= 2;
3658 }
3659 return mask;
3660 }
3661
3662 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
3663 static inline uint64_t bitmask64(unsigned int length)
3664 {
3665 assert(length > 0 && length <= 64);
3666 return ~0ULL >> (64 - length);
3667 }
3668
3669 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3670 * only require the wmask. Returns false if the imms/immr/immn are a reserved
3671 * value (ie should cause a guest UNDEF exception), and true if they are
3672 * valid, in which case the decoded bit pattern is written to result.
3673 */
3674 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3675 unsigned int imms, unsigned int immr)
3676 {
3677 uint64_t mask;
3678 unsigned e, levels, s, r;
3679 int len;
3680
3681 assert(immn < 2 && imms < 64 && immr < 64);
3682
3683 /* The bit patterns we create here are 64 bit patterns which
3684 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3685 * 64 bits each. Each element contains the same value: a run
3686 * of between 1 and e-1 non-zero bits, rotated within the
3687 * element by between 0 and e-1 bits.
3688 *
3689 * The element size and run length are encoded into immn (1 bit)
3690 * and imms (6 bits) as follows:
3691 * 64 bit elements: immn = 1, imms = <length of run - 1>
3692 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3693 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3694 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3695 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3696 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3697 * Notice that immn = 0, imms = 11111x is the only combination
3698 * not covered by one of the above options; this is reserved.
3699 * Further, <length of run - 1> all-ones is a reserved pattern.
3700 *
3701 * In all cases the rotation is by immr % e (and immr is 6 bits).
3702 */
3703
3704 /* First determine the element size */
3705 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3706 if (len < 1) {
3707 /* This is the immn == 0, imms == 0x11111x case */
3708 return false;
3709 }
3710 e = 1 << len;
3711
3712 levels = e - 1;
3713 s = imms & levels;
3714 r = immr & levels;
3715
3716 if (s == levels) {
3717 /* <length of run - 1> mustn't be all-ones. */
3718 return false;
3719 }
3720
3721 /* Create the value of one element: s+1 set bits rotated
3722 * by r within the element (which is e bits wide)...
3723 */
3724 mask = bitmask64(s + 1);
3725 if (r) {
3726 mask = (mask >> r) | (mask << (e - r));
3727 mask &= bitmask64(e);
3728 }
3729 /* ...then replicate the element over the whole 64 bit value */
3730 mask = bitfield_replicate(mask, e);
3731 *result = mask;
3732 return true;
3733 }
3734
3735 /* Logical (immediate)
3736 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
3737 * +----+-----+-------------+---+------+------+------+------+
3738 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd |
3739 * +----+-----+-------------+---+------+------+------+------+
3740 */
3741 static void disas_logic_imm(DisasContext *s, uint32_t insn)
3742 {
3743 unsigned int sf, opc, is_n, immr, imms, rn, rd;
3744 TCGv_i64 tcg_rd, tcg_rn;
3745 uint64_t wmask;
3746 bool is_and = false;
3747
3748 sf = extract32(insn, 31, 1);
3749 opc = extract32(insn, 29, 2);
3750 is_n = extract32(insn, 22, 1);
3751 immr = extract32(insn, 16, 6);
3752 imms = extract32(insn, 10, 6);
3753 rn = extract32(insn, 5, 5);
3754 rd = extract32(insn, 0, 5);
3755
3756 if (!sf && is_n) {
3757 unallocated_encoding(s);
3758 return;
3759 }
3760
3761 if (opc == 0x3) { /* ANDS */
3762 tcg_rd = cpu_reg(s, rd);
3763 } else {
3764 tcg_rd = cpu_reg_sp(s, rd);
3765 }
3766 tcg_rn = cpu_reg(s, rn);
3767
3768 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3769 /* some immediate field values are reserved */
3770 unallocated_encoding(s);
3771 return;
3772 }
3773
3774 if (!sf) {
3775 wmask &= 0xffffffff;
3776 }
3777
3778 switch (opc) {
3779 case 0x3: /* ANDS */
3780 case 0x0: /* AND */
3781 tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3782 is_and = true;
3783 break;
3784 case 0x1: /* ORR */
3785 tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3786 break;
3787 case 0x2: /* EOR */
3788 tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3789 break;
3790 default:
3791 assert(FALSE); /* must handle all above */
3792 break;
3793 }
3794
3795 if (!sf && !is_and) {
3796 /* zero extend final result; we know we can skip this for AND
3797 * since the immediate had the high 32 bits clear.
3798 */
3799 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3800 }
3801
3802 if (opc == 3) { /* ANDS */
3803 gen_logic_CC(sf, tcg_rd);
3804 }
3805 }
3806
3807 /*
3808 * Move wide (immediate)
3809 *
3810 * 31 30 29 28 23 22 21 20 5 4 0
3811 * +--+-----+-------------+-----+----------------+------+
3812 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd |
3813 * +--+-----+-------------+-----+----------------+------+
3814 *
3815 * sf: 0 -> 32 bit, 1 -> 64 bit
3816 * opc: 00 -> N, 10 -> Z, 11 -> K
3817 * hw: shift/16 (0,16, and sf only 32, 48)
3818 */
3819 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3820 {
3821 int rd = extract32(insn, 0, 5);
3822 uint64_t imm = extract32(insn, 5, 16);
3823 int sf = extract32(insn, 31, 1);
3824 int opc = extract32(insn, 29, 2);
3825 int pos = extract32(insn, 21, 2) << 4;
3826 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3827 TCGv_i64 tcg_imm;
3828
3829 if (!sf && (pos >= 32)) {
3830 unallocated_encoding(s);
3831 return;
3832 }
3833
3834 switch (opc) {
3835 case 0: /* MOVN */
3836 case 2: /* MOVZ */
3837 imm <<= pos;
3838 if (opc == 0) {
3839 imm = ~imm;
3840 }
3841 if (!sf) {
3842 imm &= 0xffffffffu;
3843 }
3844 tcg_gen_movi_i64(tcg_rd, imm);
3845 break;
3846 case 3: /* MOVK */
3847 tcg_imm = tcg_const_i64(imm);
3848 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3849 tcg_temp_free_i64(tcg_imm);
3850 if (!sf) {
3851 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3852 }
3853 break;
3854 default:
3855 unallocated_encoding(s);
3856 break;
3857 }
3858 }
3859
3860 /* Bitfield
3861 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
3862 * +----+-----+-------------+---+------+------+------+------+
3863 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd |
3864 * +----+-----+-------------+---+------+------+------+------+
3865 */
3866 static void disas_bitfield(DisasContext *s, uint32_t insn)
3867 {
3868 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3869 TCGv_i64 tcg_rd, tcg_tmp;
3870
3871 sf = extract32(insn, 31, 1);
3872 opc = extract32(insn, 29, 2);
3873 n = extract32(insn, 22, 1);
3874 ri = extract32(insn, 16, 6);
3875 si = extract32(insn, 10, 6);
3876 rn = extract32(insn, 5, 5);
3877 rd = extract32(insn, 0, 5);
3878 bitsize = sf ? 64 : 32;
3879
3880 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3881 unallocated_encoding(s);
3882 return;
3883 }
3884
3885 tcg_rd = cpu_reg(s, rd);
3886
3887 /* Suppress the zero-extend for !sf. Since RI and SI are constrained
3888 to be smaller than bitsize, we'll never reference data outside the
3889 low 32-bits anyway. */
3890 tcg_tmp = read_cpu_reg(s, rn, 1);
3891
3892 /* Recognize simple(r) extractions. */
3893 if (si >= ri) {
3894 /* Wd<s-r:0> = Wn<s:r> */
3895 len = (si - ri) + 1;
3896 if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3897 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3898 goto done;
3899 } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3900 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3901 return;
3902 }
3903 /* opc == 1, BFXIL fall through to deposit */
3904 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3905 pos = 0;
3906 } else {
3907 /* Handle the ri > si case with a deposit
3908 * Wd<32+s-r,32-r> = Wn<s:0>
3909 */
3910 len = si + 1;
3911 pos = (bitsize - ri) & (bitsize - 1);
3912 }
3913
3914 if (opc == 0 && len < ri) {
3915 /* SBFM: sign extend the destination field from len to fill
3916 the balance of the word. Let the deposit below insert all
3917 of those sign bits. */
3918 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3919 len = ri;
3920 }
3921
3922 if (opc == 1) { /* BFM, BFXIL */
3923 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3924 } else {
3925 /* SBFM or UBFM: We start with zero, and we haven't modified
3926 any bits outside bitsize, therefore the zero-extension
3927 below is unneeded. */
3928 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3929 return;
3930 }
3931
3932 done:
3933 if (!sf) { /* zero extend final result */
3934 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3935 }
3936 }
3937
3938 /* Extract
3939 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0
3940 * +----+------+-------------+---+----+------+--------+------+------+
3941 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd |
3942 * +----+------+-------------+---+----+------+--------+------+------+
3943 */
3944 static void disas_extract(DisasContext *s, uint32_t insn)
3945 {
3946 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3947
3948 sf = extract32(insn, 31, 1);
3949 n = extract32(insn, 22, 1);
3950 rm = extract32(insn, 16, 5);
3951 imm = extract32(insn, 10, 6);
3952 rn = extract32(insn, 5, 5);
3953 rd = extract32(insn, 0, 5);
3954 op21 = extract32(insn, 29, 2);
3955 op0 = extract32(insn, 21, 1);
3956 bitsize = sf ? 64 : 32;
3957
3958 if (sf != n || op21 || op0 || imm >= bitsize) {
3959 unallocated_encoding(s);
3960 } else {
3961 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3962
3963 tcg_rd = cpu_reg(s, rd);
3964
3965 if (unlikely(imm == 0)) {
3966 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3967 * so an extract from bit 0 is a special case.
3968 */
3969 if (sf) {
3970 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3971 } else {
3972 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3973 }
3974 } else {
3975 tcg_rm = cpu_reg(s, rm);
3976 tcg_rn = cpu_reg(s, rn);
3977
3978 if (sf) {
3979 /* Specialization to ROR happens in EXTRACT2. */
3980 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
3981 } else {
3982 TCGv_i32 t0 = tcg_temp_new_i32();
3983
3984 tcg_gen_extrl_i64_i32(t0, tcg_rm);
3985 if (rm == rn) {
3986 tcg_gen_rotri_i32(t0, t0, imm);
3987 } else {
3988 TCGv_i32 t1 = tcg_temp_new_i32();
3989 tcg_gen_extrl_i64_i32(t1, tcg_rn);
3990 tcg_gen_extract2_i32(t0, t0, t1, imm);
3991 tcg_temp_free_i32(t1);
3992 }
3993 tcg_gen_extu_i32_i64(tcg_rd, t0);
3994 tcg_temp_free_i32(t0);
3995 }
3996 }
3997 }
3998 }
3999
4000 /* Data processing - immediate */
4001 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
4002 {
4003 switch (extract32(insn, 23, 6)) {
4004 case 0x20: case 0x21: /* PC-rel. addressing */
4005 disas_pc_rel_adr(s, insn);
4006 break;
4007 case 0x22: case 0x23: /* Add/subtract (immediate) */
4008 disas_add_sub_imm(s, insn);
4009 break;
4010 case 0x24: /* Logical (immediate) */
4011 disas_logic_imm(s, insn);
4012 break;
4013 case 0x25: /* Move wide (immediate) */
4014 disas_movw_imm(s, insn);
4015 break;
4016 case 0x26: /* Bitfield */
4017 disas_bitfield(s, insn);
4018 break;
4019 case 0x27: /* Extract */
4020 disas_extract(s, insn);
4021 break;
4022 default:
4023 unallocated_encoding(s);
4024 break;
4025 }
4026 }
4027
4028 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4029 * Note that it is the caller's responsibility to ensure that the
4030 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4031 * mandated semantics for out of range shifts.
4032 */
4033 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4034 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4035 {
4036 switch (shift_type) {
4037 case A64_SHIFT_TYPE_LSL:
4038 tcg_gen_shl_i64(dst, src, shift_amount);
4039 break;
4040 case A64_SHIFT_TYPE_LSR:
4041 tcg_gen_shr_i64(dst, src, shift_amount);
4042 break;
4043 case A64_SHIFT_TYPE_ASR:
4044 if (!sf) {
4045 tcg_gen_ext32s_i64(dst, src);
4046 }
4047 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4048 break;
4049 case A64_SHIFT_TYPE_ROR:
4050 if (sf) {
4051 tcg_gen_rotr_i64(dst, src, shift_amount);
4052 } else {
4053 TCGv_i32 t0, t1;
4054 t0 = tcg_temp_new_i32();
4055 t1 = tcg_temp_new_i32();
4056 tcg_gen_extrl_i64_i32(t0, src);
4057 tcg_gen_extrl_i64_i32(t1, shift_amount);
4058 tcg_gen_rotr_i32(t0, t0, t1);
4059 tcg_gen_extu_i32_i64(dst, t0);
4060 tcg_temp_free_i32(t0);
4061 tcg_temp_free_i32(t1);
4062 }
4063 break;
4064 default:
4065 assert(FALSE); /* all shift types should be handled */
4066 break;
4067 }
4068
4069 if (!sf) { /* zero extend final result */
4070 tcg_gen_ext32u_i64(dst, dst);
4071 }
4072 }
4073
4074 /* Shift a TCGv src by immediate, put result in dst.
4075 * The shift amount must be in range (this should always be true as the
4076 * relevant instructions will UNDEF on bad shift immediates).
4077 */
4078 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4079 enum a64_shift_type shift_type, unsigned int shift_i)
4080 {
4081 assert(shift_i < (sf ? 64 : 32));
4082
4083 if (shift_i == 0) {
4084 tcg_gen_mov_i64(dst, src);
4085 } else {
4086 TCGv_i64 shift_const;
4087
4088 shift_const = tcg_const_i64(shift_i);
4089 shift_reg(dst, src, sf, shift_type, shift_const);
4090 tcg_temp_free_i64(shift_const);
4091 }
4092 }
4093
4094 /* Logical (shifted register)
4095 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
4096 * +----+-----+-----------+-------+---+------+--------+------+------+
4097 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd |
4098 * +----+-----+-----------+-------+---+------+--------+------+------+
4099 */
4100 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4101 {
4102 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4103 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4104
4105 sf = extract32(insn, 31, 1);
4106 opc = extract32(insn, 29, 2);
4107 shift_type = extract32(insn, 22, 2);
4108 invert = extract32(insn, 21, 1);
4109 rm = extract32(insn, 16, 5);
4110 shift_amount = extract32(insn, 10, 6);
4111 rn = extract32(insn, 5, 5);
4112 rd = extract32(insn, 0, 5);
4113
4114 if (!sf && (shift_amount & (1 << 5))) {
4115 unallocated_encoding(s);
4116 return;
4117 }
4118
4119 tcg_rd = cpu_reg(s, rd);
4120
4121 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4122 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4123 * register-register MOV and MVN, so it is worth special casing.
4124 */
4125 tcg_rm = cpu_reg(s, rm);
4126 if (invert) {
4127 tcg_gen_not_i64(tcg_rd, tcg_rm);
4128 if (!sf) {
4129 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4130 }
4131 } else {
4132 if (sf) {
4133 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4134 } else {
4135 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4136 }
4137 }
4138 return;
4139 }
4140
4141 tcg_rm = read_cpu_reg(s, rm, sf);
4142
4143 if (shift_amount) {
4144 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4145 }
4146
4147 tcg_rn = cpu_reg(s, rn);
4148
4149 switch (opc | (invert << 2)) {
4150 case 0: /* AND */
4151 case 3: /* ANDS */
4152 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4153 break;
4154 case 1: /* ORR */
4155 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4156 break;
4157 case 2: /* EOR */
4158 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4159 break;
4160 case 4: /* BIC */
4161 case 7: /* BICS */
4162 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4163 break;
4164 case 5: /* ORN */
4165 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4166 break;
4167 case 6: /* EON */
4168 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4169 break;
4170 default:
4171 assert(FALSE);
4172 break;
4173 }
4174
4175 if (!sf) {
4176 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4177 }
4178
4179 if (opc == 3) {
4180 gen_logic_CC(sf, tcg_rd);
4181 }
4182 }
4183
4184 /*
4185 * Add/subtract (extended register)
4186 *
4187 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0|
4188 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4189 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd |
4190 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4191 *
4192 * sf: 0 -> 32bit, 1 -> 64bit
4193 * op: 0 -> add , 1 -> sub
4194 * S: 1 -> set flags
4195 * opt: 00
4196 * option: extension type (see DecodeRegExtend)
4197 * imm3: optional shift to Rm
4198 *
4199 * Rd = Rn + LSL(extend(Rm), amount)
4200 */
4201 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4202 {
4203 int rd = extract32(insn, 0, 5);
4204 int rn = extract32(insn, 5, 5);
4205 int imm3 = extract32(insn, 10, 3);
4206 int option = extract32(insn, 13, 3);
4207 int rm = extract32(insn, 16, 5);
4208 int opt = extract32(insn, 22, 2);
4209 bool setflags = extract32(insn, 29, 1);
4210 bool sub_op = extract32(insn, 30, 1);
4211 bool sf = extract32(insn, 31, 1);
4212
4213 TCGv_i64 tcg_rm, tcg_rn; /* temps */
4214 TCGv_i64 tcg_rd;
4215 TCGv_i64 tcg_result;
4216
4217 if (imm3 > 4 || opt != 0) {
4218 unallocated_encoding(s);
4219 return;
4220 }
4221
4222 /* non-flag setting ops may use SP */
4223 if (!setflags) {
4224 tcg_rd = cpu_reg_sp(s, rd);
4225 } else {
4226 tcg_rd = cpu_reg(s, rd);
4227 }
4228 tcg_rn = read_cpu_reg_sp(s, rn, sf);
4229
4230 tcg_rm = read_cpu_reg(s, rm, sf);
4231 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4232
4233 tcg_result = tcg_temp_new_i64();
4234
4235 if (!setflags) {
4236 if (sub_op) {
4237 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4238 } else {
4239 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4240 }
4241 } else {
4242 if (sub_op) {
4243 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4244 } else {
4245 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4246 }
4247 }
4248
4249 if (sf) {
4250 tcg_gen_mov_i64(tcg_rd, tcg_result);
4251 } else {
4252 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4253 }
4254
4255 tcg_temp_free_i64(tcg_result);
4256 }
4257
4258 /*
4259 * Add/subtract (shifted register)
4260 *
4261 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
4262 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4263 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd |
4264 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4265 *
4266 * sf: 0 -> 32bit, 1 -> 64bit
4267 * op: 0 -> add , 1 -> sub
4268 * S: 1 -> set flags
4269 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4270 * imm6: Shift amount to apply to Rm before the add/sub
4271 */
4272 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4273 {
4274 int rd = extract32(insn, 0, 5);
4275 int rn = extract32(insn, 5, 5);
4276 int imm6 = extract32(insn, 10, 6);
4277 int rm = extract32(insn, 16, 5);
4278 int shift_type = extract32(insn, 22, 2);
4279 bool setflags = extract32(insn, 29, 1);
4280 bool sub_op = extract32(insn, 30, 1);
4281 bool sf = extract32(insn, 31, 1);
4282
4283 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4284 TCGv_i64 tcg_rn, tcg_rm;
4285 TCGv_i64 tcg_result;
4286
4287 if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4288 unallocated_encoding(s);
4289 return;
4290 }
4291
4292 tcg_rn = read_cpu_reg(s, rn, sf);
4293 tcg_rm = read_cpu_reg(s, rm, sf);
4294
4295 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4296
4297 tcg_result = tcg_temp_new_i64();
4298
4299 if (!setflags) {
4300 if (sub_op) {
4301 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4302 } else {
4303 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4304 }
4305 } else {
4306 if (sub_op) {
4307 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4308 } else {
4309 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4310 }
4311 }
4312
4313 if (sf) {
4314 tcg_gen_mov_i64(tcg_rd, tcg_result);
4315 } else {
4316 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4317 }
4318
4319 tcg_temp_free_i64(tcg_result);
4320 }
4321
4322 /* Data-processing (3 source)
4323 *
4324 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0
4325 * +--+------+-----------+------+------+----+------+------+------+
4326 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd |
4327 * +--+------+-----------+------+------+----+------+------+------+
4328 */
4329 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4330 {
4331 int rd = extract32(insn, 0, 5);
4332 int rn = extract32(insn, 5, 5);
4333 int ra = extract32(insn, 10, 5);
4334 int rm = extract32(insn, 16, 5);
4335 int op_id = (extract32(insn, 29, 3) << 4) |
4336 (extract32(insn, 21, 3) << 1) |
4337 extract32(insn, 15, 1);
4338 bool sf = extract32(insn, 31, 1);
4339 bool is_sub = extract32(op_id, 0, 1);
4340 bool is_high = extract32(op_id, 2, 1);
4341 bool is_signed = false;
4342 TCGv_i64 tcg_op1;
4343 TCGv_i64 tcg_op2;
4344 TCGv_i64 tcg_tmp;
4345
4346 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4347 switch (op_id) {
4348 case 0x42: /* SMADDL */
4349 case 0x43: /* SMSUBL */
4350 case 0x44: /* SMULH */
4351 is_signed = true;
4352 break;
4353 case 0x0: /* MADD (32bit) */
4354 case 0x1: /* MSUB (32bit) */
4355 case 0x40: /* MADD (64bit) */
4356 case 0x41: /* MSUB (64bit) */
4357 case 0x4a: /* UMADDL */
4358 case 0x4b: /* UMSUBL */
4359 case 0x4c: /* UMULH */
4360 break;
4361 default:
4362 unallocated_encoding(s);
4363 return;
4364 }
4365
4366 if (is_high) {
4367 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4368 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4369 TCGv_i64 tcg_rn = cpu_reg(s, rn);
4370 TCGv_i64 tcg_rm = cpu_reg(s, rm);
4371
4372 if (is_signed) {
4373 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4374 } else {
4375 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4376 }
4377
4378 tcg_temp_free_i64(low_bits);
4379 return;
4380 }
4381
4382 tcg_op1 = tcg_temp_new_i64();
4383 tcg_op2 = tcg_temp_new_i64();
4384 tcg_tmp = tcg_temp_new_i64();
4385
4386 if (op_id < 0x42) {
4387 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4388 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4389 } else {
4390 if (is_signed) {
4391 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4392 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4393 } else {
4394 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4395 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4396 }
4397 }
4398
4399 if (ra == 31 && !is_sub) {
4400 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4401 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4402 } else {
4403 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4404 if (is_sub) {
4405 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4406 } else {
4407 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4408 }
4409 }
4410
4411 if (!sf) {
4412 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4413 }
4414
4415 tcg_temp_free_i64(tcg_op1);
4416 tcg_temp_free_i64(tcg_op2);
4417 tcg_temp_free_i64(tcg_tmp);
4418 }
4419
4420 /* Add/subtract (with carry)
4421 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
4422 * +--+--+--+------------------------+------+-------------+------+-----+
4423 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd |
4424 * +--+--+--+------------------------+------+-------------+------+-----+
4425 */
4426
4427 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4428 {
4429 unsigned int sf, op, setflags, rm, rn, rd;
4430 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4431
4432 sf = extract32(insn, 31, 1);
4433 op = extract32(insn, 30, 1);
4434 setflags = extract32(insn, 29, 1);
4435 rm = extract32(insn, 16, 5);
4436 rn = extract32(insn, 5, 5);
4437 rd = extract32(insn, 0, 5);
4438
4439 tcg_rd = cpu_reg(s, rd);
4440 tcg_rn = cpu_reg(s, rn);
4441
4442 if (op) {
4443 tcg_y = new_tmp_a64(s);
4444 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4445 } else {
4446 tcg_y = cpu_reg(s, rm);
4447 }
4448
4449 if (setflags) {
4450 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4451 } else {
4452 gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4453 }
4454 }
4455
4456 /*
4457 * Rotate right into flags
4458 * 31 30 29 21 15 10 5 4 0
4459 * +--+--+--+-----------------+--------+-----------+------+--+------+
4460 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask |
4461 * +--+--+--+-----------------+--------+-----------+------+--+------+
4462 */
4463 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4464 {
4465 int mask = extract32(insn, 0, 4);
4466 int o2 = extract32(insn, 4, 1);
4467 int rn = extract32(insn, 5, 5);
4468 int imm6 = extract32(insn, 15, 6);
4469 int sf_op_s = extract32(insn, 29, 3);
4470 TCGv_i64 tcg_rn;
4471 TCGv_i32 nzcv;
4472
4473 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4474 unallocated_encoding(s);
4475 return;
4476 }
4477
4478 tcg_rn = read_cpu_reg(s, rn, 1);
4479 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4480
4481 nzcv = tcg_temp_new_i32();
4482 tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4483
4484 if (mask & 8) { /* N */
4485 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4486 }
4487 if (mask & 4) { /* Z */
4488 tcg_gen_not_i32(cpu_ZF, nzcv);
4489 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4490 }
4491 if (mask & 2) { /* C */
4492 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4493 }
4494 if (mask & 1) { /* V */
4495 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4496 }
4497
4498 tcg_temp_free_i32(nzcv);
4499 }
4500
4501 /*
4502 * Evaluate into flags
4503 * 31 30 29 21 15 14 10 5 4 0
4504 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4505 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask |
4506 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4507 */
4508 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4509 {
4510 int o3_mask = extract32(insn, 0, 5);
4511 int rn = extract32(insn, 5, 5);
4512 int o2 = extract32(insn, 15, 6);
4513 int sz = extract32(insn, 14, 1);
4514 int sf_op_s = extract32(insn, 29, 3);
4515 TCGv_i32 tmp;
4516 int shift;
4517
4518 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4519 !dc_isar_feature(aa64_condm_4, s)) {
4520 unallocated_encoding(s);
4521 return;
4522 }
4523 shift = sz ? 16 : 24; /* SETF16 or SETF8 */
4524
4525 tmp = tcg_temp_new_i32();
4526 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4527 tcg_gen_shli_i32(cpu_NF, tmp, shift);
4528 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4529 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4530 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4531 tcg_temp_free_i32(tmp);
4532 }
4533
4534 /* Conditional compare (immediate / register)
4535 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
4536 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4537 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv |
4538 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4539 * [1] y [0] [0]
4540 */
4541 static void disas_cc(DisasContext *s, uint32_t insn)
4542 {
4543 unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4544 TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4545 TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4546 DisasCompare c;
4547
4548 if (!extract32(insn, 29, 1)) {
4549 unallocated_encoding(s);
4550 return;
4551 }
4552 if (insn & (1 << 10 | 1 << 4)) {
4553 unallocated_encoding(s);
4554 return;
4555 }
4556 sf = extract32(insn, 31, 1);
4557 op = extract32(insn, 30, 1);
4558 is_imm = extract32(insn, 11, 1);
4559 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4560 cond = extract32(insn, 12, 4);
4561 rn = extract32(insn, 5, 5);
4562 nzcv = extract32(insn, 0, 4);
4563
4564 /* Set T0 = !COND. */
4565 tcg_t0 = tcg_temp_new_i32();
4566 arm_test_cc(&c, cond);
4567 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4568 arm_free_cc(&c);
4569
4570 /* Load the arguments for the new comparison. */
4571 if (is_imm) {
4572 tcg_y = new_tmp_a64(s);
4573 tcg_gen_movi_i64(tcg_y, y);
4574 } else {
4575 tcg_y = cpu_reg(s, y);
4576 }
4577 tcg_rn = cpu_reg(s, rn);
4578
4579 /* Set the flags for the new comparison. */
4580 tcg_tmp = tcg_temp_new_i64();
4581 if (op) {
4582 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4583 } else {
4584 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4585 }
4586 tcg_temp_free_i64(tcg_tmp);
4587
4588 /* If COND was false, force the flags to #nzcv. Compute two masks
4589 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4590 * For tcg hosts that support ANDC, we can make do with just T1.
4591 * In either case, allow the tcg optimizer to delete any unused mask.
4592 */
4593 tcg_t1 = tcg_temp_new_i32();
4594 tcg_t2 = tcg_temp_new_i32();
4595 tcg_gen_neg_i32(tcg_t1, tcg_t0);
4596 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4597
4598 if (nzcv & 8) { /* N */
4599 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4600 } else {
4601 if (TCG_TARGET_HAS_andc_i32) {
4602 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4603 } else {
4604 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4605 }
4606 }
4607 if (nzcv & 4) { /* Z */
4608 if (TCG_TARGET_HAS_andc_i32) {
4609 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4610 } else {
4611 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4612 }
4613 } else {
4614 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4615 }
4616 if (nzcv & 2) { /* C */
4617 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4618 } else {
4619 if (TCG_TARGET_HAS_andc_i32) {
4620 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4621 } else {
4622 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4623 }
4624 }
4625 if (nzcv & 1) { /* V */
4626 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4627 } else {
4628 if (TCG_TARGET_HAS_andc_i32) {
4629 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4630 } else {
4631 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4632 }
4633 }
4634 tcg_temp_free_i32(tcg_t0);
4635 tcg_temp_free_i32(tcg_t1);
4636 tcg_temp_free_i32(tcg_t2);
4637 }
4638
4639 /* Conditional select
4640 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0
4641 * +----+----+---+-----------------+------+------+-----+------+------+
4642 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd |
4643 * +----+----+---+-----------------+------+------+-----+------+------+
4644 */
4645 static void disas_cond_select(DisasContext *s, uint32_t insn)
4646 {
4647 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4648 TCGv_i64 tcg_rd, zero;
4649 DisasCompare64 c;
4650
4651 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4652 /* S == 1 or op2<1> == 1 */
4653 unallocated_encoding(s);
4654 return;
4655 }
4656 sf = extract32(insn, 31, 1);
4657 else_inv = extract32(insn, 30, 1);
4658 rm = extract32(insn, 16, 5);
4659 cond = extract32(insn, 12, 4);
4660 else_inc = extract32(insn, 10, 1);
4661 rn = extract32(insn, 5, 5);
4662 rd = extract32(insn, 0, 5);
4663
4664 tcg_rd = cpu_reg(s, rd);
4665
4666 a64_test_cc(&c, cond);
4667 zero = tcg_const_i64(0);
4668
4669 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4670 /* CSET & CSETM. */
4671 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4672 if (else_inv) {
4673 tcg_gen_neg_i64(tcg_rd, tcg_rd);
4674 }
4675 } else {
4676 TCGv_i64 t_true = cpu_reg(s, rn);
4677 TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4678 if (else_inv && else_inc) {
4679 tcg_gen_neg_i64(t_false, t_false);
4680 } else if (else_inv) {
4681 tcg_gen_not_i64(t_false, t_false);
4682 } else if (else_inc) {
4683 tcg_gen_addi_i64(t_false, t_false, 1);
4684 }
4685 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4686 }
4687
4688 tcg_temp_free_i64(zero);
4689 a64_free_cc(&c);
4690
4691 if (!sf) {
4692 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4693 }
4694 }
4695
4696 static void handle_clz(DisasContext *s, unsigned int sf,
4697 unsigned int rn, unsigned int rd)
4698 {
4699 TCGv_i64 tcg_rd, tcg_rn;
4700 tcg_rd = cpu_reg(s, rd);
4701 tcg_rn = cpu_reg(s, rn);
4702
4703 if (sf) {
4704 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4705 } else {
4706 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4707 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4708 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4709 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4710 tcg_temp_free_i32(tcg_tmp32);
4711 }
4712 }
4713
4714 static void handle_cls(DisasContext *s, unsigned int sf,
4715 unsigned int rn, unsigned int rd)
4716 {
4717 TCGv_i64 tcg_rd, tcg_rn;
4718 tcg_rd = cpu_reg(s, rd);
4719 tcg_rn = cpu_reg(s, rn);
4720
4721 if (sf) {
4722 tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4723 } else {
4724 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4725 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4726 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4727 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4728 tcg_temp_free_i32(tcg_tmp32);
4729 }
4730 }
4731
4732 static void handle_rbit(DisasContext *s, unsigned int sf,
4733 unsigned int rn, unsigned int rd)
4734 {
4735 TCGv_i64 tcg_rd, tcg_rn;
4736 tcg_rd = cpu_reg(s, rd);
4737 tcg_rn = cpu_reg(s, rn);
4738
4739 if (sf) {
4740 gen_helper_rbit64(tcg_rd, tcg_rn);
4741 } else {
4742 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4743 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4744 gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4745 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4746 tcg_temp_free_i32(tcg_tmp32);
4747 }
4748 }
4749
4750 /* REV with sf==1, opcode==3 ("REV64") */
4751 static void handle_rev64(DisasContext *s, unsigned int sf,
4752 unsigned int rn, unsigned int rd)
4753 {
4754 if (!sf) {
4755 unallocated_encoding(s);
4756 return;
4757 }
4758 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4759 }
4760
4761 /* REV with sf==0, opcode==2
4762 * REV32 (sf==1, opcode==2)
4763 */
4764 static void handle_rev32(DisasContext *s, unsigned int sf,
4765 unsigned int rn, unsigned int rd)
4766 {
4767 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4768
4769 if (sf) {
4770 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4771 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4772
4773 /* bswap32_i64 requires zero high word */
4774 tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4775 tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4776 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4777 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4778 tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4779
4780 tcg_temp_free_i64(tcg_tmp);
4781 } else {
4782 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4783 tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4784 }
4785 }
4786
4787 /* REV16 (opcode==1) */
4788 static void handle_rev16(DisasContext *s, unsigned int sf,
4789 unsigned int rn, unsigned int rd)
4790 {
4791 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4792 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4793 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4794 TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4795
4796 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4797 tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4798 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4799 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4800 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4801
4802 tcg_temp_free_i64(mask);
4803 tcg_temp_free_i64(tcg_tmp);
4804 }
4805
4806 /* Data-processing (1 source)
4807 * 31 30 29 28 21 20 16 15 10 9 5 4 0
4808 * +----+---+---+-----------------+---------+--------+------+------+
4809 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd |
4810 * +----+---+---+-----------------+---------+--------+------+------+
4811 */
4812 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4813 {
4814 unsigned int sf, opcode, opcode2, rn, rd;
4815 TCGv_i64 tcg_rd;
4816
4817 if (extract32(insn, 29, 1)) {
4818 unallocated_encoding(s);
4819 return;
4820 }
4821
4822 sf = extract32(insn, 31, 1);
4823 opcode = extract32(insn, 10, 6);
4824 opcode2 = extract32(insn, 16, 5);
4825 rn = extract32(insn, 5, 5);
4826 rd = extract32(insn, 0, 5);
4827
4828 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
4829
4830 switch (MAP(sf, opcode2, opcode)) {
4831 case MAP(0, 0x00, 0x00): /* RBIT */
4832 case MAP(1, 0x00, 0x00):
4833 handle_rbit(s, sf, rn, rd);
4834 break;
4835 case MAP(0, 0x00, 0x01): /* REV16 */
4836 case MAP(1, 0x00, 0x01):
4837 handle_rev16(s, sf, rn, rd);
4838 break;
4839 case MAP(0, 0x00, 0x02): /* REV/REV32 */
4840 case MAP(1, 0x00, 0x02):
4841 handle_rev32(s, sf, rn, rd);
4842 break;
4843 case MAP(1, 0x00, 0x03): /* REV64 */
4844 handle_rev64(s, sf, rn, rd);
4845 break;
4846 case MAP(0, 0x00, 0x04): /* CLZ */
4847 case MAP(1, 0x00, 0x04):
4848 handle_clz(s, sf, rn, rd);
4849 break;
4850 case MAP(0, 0x00, 0x05): /* CLS */
4851 case MAP(1, 0x00, 0x05):
4852 handle_cls(s, sf, rn, rd);
4853 break;
4854 case MAP(1, 0x01, 0x00): /* PACIA */
4855 if (s->pauth_active) {
4856 tcg_rd = cpu_reg(s, rd);
4857 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4858 } else if (!dc_isar_feature(aa64_pauth, s)) {
4859 goto do_unallocated;
4860 }
4861 break;
4862 case MAP(1, 0x01, 0x01): /* PACIB */
4863 if (s->pauth_active) {
4864 tcg_rd = cpu_reg(s, rd);
4865 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4866 } else if (!dc_isar_feature(aa64_pauth, s)) {
4867 goto do_unallocated;
4868 }
4869 break;
4870 case MAP(1, 0x01, 0x02): /* PACDA */
4871 if (s->pauth_active) {
4872 tcg_rd = cpu_reg(s, rd);
4873 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4874 } else if (!dc_isar_feature(aa64_pauth, s)) {
4875 goto do_unallocated;
4876 }
4877 break;
4878 case MAP(1, 0x01, 0x03): /* PACDB */
4879 if (s->pauth_active) {
4880 tcg_rd = cpu_reg(s, rd);
4881 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4882 } else if (!dc_isar_feature(aa64_pauth, s)) {
4883 goto do_unallocated;
4884 }
4885 break;
4886 case MAP(1, 0x01, 0x04): /* AUTIA */
4887 if (s->pauth_active) {
4888 tcg_rd = cpu_reg(s, rd);
4889 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4890 } else if (!dc_isar_feature(aa64_pauth, s)) {
4891 goto do_unallocated;
4892 }
4893 break;
4894 case MAP(1, 0x01, 0x05): /* AUTIB */
4895 if (s->pauth_active) {
4896 tcg_rd = cpu_reg(s, rd);
4897 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4898 } else if (!dc_isar_feature(aa64_pauth, s)) {
4899 goto do_unallocated;
4900 }
4901 break;
4902 case MAP(1, 0x01, 0x06): /* AUTDA */
4903 if (s->pauth_active) {
4904 tcg_rd = cpu_reg(s, rd);
4905 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4906 } else if (!dc_isar_feature(aa64_pauth, s)) {
4907 goto do_unallocated;
4908 }
4909 break;
4910 case MAP(1, 0x01, 0x07): /* AUTDB */
4911 if (s->pauth_active) {
4912 tcg_rd = cpu_reg(s, rd);
4913 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4914 } else if (!dc_isar_feature(aa64_pauth, s)) {
4915 goto do_unallocated;
4916 }
4917 break;
4918 case MAP(1, 0x01, 0x08): /* PACIZA */
4919 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4920 goto do_unallocated;
4921 } else if (s->pauth_active) {
4922 tcg_rd = cpu_reg(s, rd);
4923 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4924 }
4925 break;
4926 case MAP(1, 0x01, 0x09): /* PACIZB */
4927 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4928 goto do_unallocated;
4929 } else if (s->pauth_active) {
4930 tcg_rd = cpu_reg(s, rd);
4931 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4932 }
4933 break;
4934 case MAP(1, 0x01, 0x0a): /* PACDZA */
4935 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4936 goto do_unallocated;
4937 } else if (s->pauth_active) {
4938 tcg_rd = cpu_reg(s, rd);
4939 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4940 }
4941 break;
4942 case MAP(1, 0x01, 0x0b): /* PACDZB */
4943 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4944 goto do_unallocated;
4945 } else if (s->pauth_active) {
4946 tcg_rd = cpu_reg(s, rd);
4947 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4948 }
4949 break;
4950 case MAP(1, 0x01, 0x0c): /* AUTIZA */
4951 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4952 goto do_unallocated;
4953 } else if (s->pauth_active) {
4954 tcg_rd = cpu_reg(s, rd);
4955 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4956 }
4957 break;
4958 case MAP(1, 0x01, 0x0d): /* AUTIZB */
4959 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4960 goto do_unallocated;
4961 } else if (s->pauth_active) {
4962 tcg_rd = cpu_reg(s, rd);
4963 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4964 }
4965 break;
4966 case MAP(1, 0x01, 0x0e): /* AUTDZA */
4967 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4968 goto do_unallocated;
4969 } else if (s->pauth_active) {
4970 tcg_rd = cpu_reg(s, rd);
4971 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4972 }
4973 break;
4974 case MAP(1, 0x01, 0x0f): /* AUTDZB */
4975 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4976 goto do_unallocated;
4977 } else if (s->pauth_active) {
4978 tcg_rd = cpu_reg(s, rd);
4979 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4980 }
4981 break;
4982 case MAP(1, 0x01, 0x10): /* XPACI */
4983 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4984 goto do_unallocated;
4985 } else if (s->pauth_active) {
4986 tcg_rd = cpu_reg(s, rd);
4987 gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
4988 }
4989 break;
4990 case MAP(1, 0x01, 0x11): /* XPACD */
4991 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4992 goto do_unallocated;
4993 } else if (s->pauth_active) {
4994 tcg_rd = cpu_reg(s, rd);
4995 gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
4996 }
4997 break;
4998 default:
4999 do_unallocated:
5000 unallocated_encoding(s);
5001 break;
5002 }
5003
5004 #undef MAP
5005 }
5006
5007 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5008 unsigned int rm, unsigned int rn, unsigned int rd)
5009 {
5010 TCGv_i64 tcg_n, tcg_m, tcg_rd;
5011 tcg_rd = cpu_reg(s, rd);
5012
5013 if (!sf && is_signed) {
5014 tcg_n = new_tmp_a64(s);
5015 tcg_m = new_tmp_a64(s);
5016 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5017 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5018 } else {
5019 tcg_n = read_cpu_reg(s, rn, sf);
5020 tcg_m = read_cpu_reg(s, rm, sf);
5021 }
5022
5023 if (is_signed) {
5024 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5025 } else {
5026 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5027 }
5028
5029 if (!sf) { /* zero extend final result */
5030 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5031 }
5032 }
5033
5034 /* LSLV, LSRV, ASRV, RORV */
5035 static void handle_shift_reg(DisasContext *s,
5036 enum a64_shift_type shift_type, unsigned int sf,
5037 unsigned int rm, unsigned int rn, unsigned int rd)
5038 {
5039 TCGv_i64 tcg_shift = tcg_temp_new_i64();
5040 TCGv_i64 tcg_rd = cpu_reg(s, rd);
5041 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5042
5043 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5044 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5045 tcg_temp_free_i64(tcg_shift);
5046 }
5047
5048 /* CRC32[BHWX], CRC32C[BHWX] */
5049 static void handle_crc32(DisasContext *s,
5050 unsigned int sf, unsigned int sz, bool crc32c,
5051 unsigned int rm, unsigned int rn, unsigned int rd)
5052 {
5053 TCGv_i64 tcg_acc, tcg_val;
5054 TCGv_i32 tcg_bytes;
5055
5056 if (!dc_isar_feature(aa64_crc32, s)
5057 || (sf == 1 && sz != 3)
5058 || (sf == 0 && sz == 3)) {
5059 unallocated_encoding(s);
5060 return;
5061 }
5062
5063 if (sz == 3) {
5064 tcg_val = cpu_reg(s, rm);
5065 } else {
5066 uint64_t mask;
5067 switch (sz) {
5068 case 0:
5069 mask = 0xFF;
5070 break;
5071 case 1:
5072 mask = 0xFFFF;
5073 break;
5074 case 2:
5075 mask = 0xFFFFFFFF;
5076 break;
5077 default:
5078 g_assert_not_reached();
5079 }
5080 tcg_val = new_tmp_a64(s);
5081 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5082 }
5083
5084 tcg_acc = cpu_reg(s, rn);
5085 tcg_bytes = tcg_const_i32(1 << sz);
5086
5087 if (crc32c) {
5088 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5089 } else {
5090 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5091 }
5092
5093 tcg_temp_free_i32(tcg_bytes);
5094 }
5095
5096 /* Data-processing (2 source)
5097 * 31 30 29 28 21 20 16 15 10 9 5 4 0
5098 * +----+---+---+-----------------+------+--------+------+------+
5099 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd |
5100 * +----+---+---+-----------------+------+--------+------+------+
5101 */
5102 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5103 {
5104 unsigned int sf, rm, opcode, rn, rd;
5105 sf = extract32(insn, 31, 1);
5106 rm = extract32(insn, 16, 5);
5107 opcode = extract32(insn, 10, 6);
5108 rn = extract32(insn, 5, 5);
5109 rd = extract32(insn, 0, 5);
5110
5111 if (extract32(insn, 29, 1)) {
5112 unallocated_encoding(s);
5113 return;
5114 }
5115
5116 switch (opcode) {
5117 case 2: /* UDIV */
5118 handle_div(s, false, sf, rm, rn, rd);
5119 break;
5120 case 3: /* SDIV */
5121 handle_div(s, true, sf, rm, rn, rd);
5122 break;
5123 case 8: /* LSLV */
5124 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5125 break;
5126 case 9: /* LSRV */
5127 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5128 break;
5129 case 10: /* ASRV */
5130 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5131 break;
5132 case 11: /* RORV */
5133 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5134 break;
5135 case 12: /* PACGA */
5136 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5137 goto do_unallocated;
5138 }
5139 gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5140 cpu_reg(s, rn), cpu_reg_sp(s, rm));
5141 break;
5142 case 16:
5143 case 17:
5144 case 18:
5145 case 19:
5146 case 20:
5147 case 21:
5148 case 22:
5149 case 23: /* CRC32 */
5150 {
5151 int sz = extract32(opcode, 0, 2);
5152 bool crc32c = extract32(opcode, 2, 1);
5153 handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5154 break;
5155 }
5156 default:
5157 do_unallocated:
5158 unallocated_encoding(s);
5159 break;
5160 }
5161 }
5162
5163 /*
5164 * Data processing - register
5165 * 31 30 29 28 25 21 20 16 10 0
5166 * +--+---+--+---+-------+-----+-------+-------+---------+
5167 * | |op0| |op1| 1 0 1 | op2 | | op3 | |
5168 * +--+---+--+---+-------+-----+-------+-------+---------+
5169 */
5170 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5171 {
5172 int op0 = extract32(insn, 30, 1);
5173 int op1 = extract32(insn, 28, 1);
5174 int op2 = extract32(insn, 21, 4);
5175 int op3 = extract32(insn, 10, 6);
5176
5177 if (!op1) {
5178 if (op2 & 8) {
5179 if (op2 & 1) {
5180 /* Add/sub (extended register) */
5181 disas_add_sub_ext_reg(s, insn);
5182 } else {
5183 /* Add/sub (shifted register) */
5184 disas_add_sub_reg(s, insn);
5185 }
5186 } else {
5187 /* Logical (shifted register) */
5188 disas_logic_reg(s, insn);
5189 }
5190 return;
5191 }
5192
5193 switch (op2) {
5194 case 0x0:
5195 switch (op3) {
5196 case 0x00: /* Add/subtract (with carry) */
5197 disas_adc_sbc(s, insn);
5198 break;
5199
5200 case 0x01: /* Rotate right into flags */
5201 case 0x21:
5202 disas_rotate_right_into_flags(s, insn);
5203 break;
5204
5205 case 0x02: /* Evaluate into flags */
5206 case 0x12:
5207 case 0x22:
5208 case 0x32:
5209 disas_evaluate_into_flags(s, insn);
5210 break;
5211
5212 default:
5213 goto do_unallocated;
5214 }
5215 break;
5216
5217 case 0x2: /* Conditional compare */
5218 disas_cc(s, insn); /* both imm and reg forms */
5219 break;
5220
5221 case 0x4: /* Conditional select */
5222 disas_cond_select(s, insn);
5223 break;
5224
5225 case 0x6: /* Data-processing */
5226 if (op0) { /* (1 source) */
5227 disas_data_proc_1src(s, insn);
5228 } else { /* (2 source) */
5229 disas_data_proc_2src(s, insn);
5230 }
5231 break;
5232 case 0x8 ... 0xf: /* (3 source) */
5233 disas_data_proc_3src(s, insn);
5234 break;
5235
5236 default:
5237 do_unallocated:
5238 unallocated_encoding(s);
5239 break;
5240 }
5241 }
5242
5243 static void handle_fp_compare(DisasContext *s, int size,
5244 unsigned int rn, unsigned int rm,
5245 bool cmp_with_zero, bool signal_all_nans)
5246 {
5247 TCGv_i64 tcg_flags = tcg_temp_new_i64();
5248 TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
5249
5250 if (size == MO_64) {
5251 TCGv_i64 tcg_vn, tcg_vm;
5252
5253 tcg_vn = read_fp_dreg(s, rn);
5254 if (cmp_with_zero) {
5255 tcg_vm = tcg_const_i64(0);
5256 } else {
5257 tcg_vm = read_fp_dreg(s, rm);
5258 }
5259 if (signal_all_nans) {
5260 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5261 } else {
5262 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5263 }
5264 tcg_temp_free_i64(tcg_vn);
5265 tcg_temp_free_i64(tcg_vm);
5266 } else {
5267 TCGv_i32 tcg_vn = tcg_temp_new_i32();
5268 TCGv_i32 tcg_vm = tcg_temp_new_i32();
5269
5270 read_vec_element_i32(s, tcg_vn, rn, 0, size);
5271 if (cmp_with_zero) {
5272 tcg_gen_movi_i32(tcg_vm, 0);
5273 } else {
5274 read_vec_element_i32(s, tcg_vm, rm, 0, size);
5275 }
5276
5277 switch (size) {
5278 case MO_32:
5279 if (signal_all_nans) {
5280 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5281 } else {
5282 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5283 }
5284 break;
5285 case MO_16:
5286 if (signal_all_nans) {
5287 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5288 } else {
5289 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5290 }
5291 break;
5292 default:
5293 g_assert_not_reached();
5294 }
5295
5296 tcg_temp_free_i32(tcg_vn);
5297 tcg_temp_free_i32(tcg_vm);
5298 }
5299
5300 tcg_temp_free_ptr(fpst);
5301
5302 gen_set_nzcv(tcg_flags);
5303
5304 tcg_temp_free_i64(tcg_flags);
5305 }
5306
5307 /* Floating point compare
5308 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0
5309 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5310 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 |
5311 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5312 */
5313 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5314 {
5315 unsigned int mos, type, rm, op, rn, opc, op2r;
5316 int size;
5317
5318 mos = extract32(insn, 29, 3);
5319 type = extract32(insn, 22, 2);
5320 rm = extract32(insn, 16, 5);
5321 op = extract32(insn, 14, 2);
5322 rn = extract32(insn, 5, 5);
5323 opc = extract32(insn, 3, 2);
5324 op2r = extract32(insn, 0, 3);
5325
5326 if (mos || op || op2r) {
5327 unallocated_encoding(s);
5328 return;
5329 }
5330
5331 switch (type) {
5332 case 0:
5333 size = MO_32;
5334 break;
5335 case 1:
5336 size = MO_64;
5337 break;
5338 case 3:
5339 size = MO_16;
5340 if (dc_isar_feature(aa64_fp16, s)) {
5341 break;
5342 }
5343 /* fallthru */
5344 default:
5345 unallocated_encoding(s);
5346 return;
5347 }
5348
5349 if (!fp_access_check(s)) {
5350 return;
5351 }
5352
5353 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5354 }
5355
5356 /* Floating point conditional compare
5357 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
5358 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5359 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv |
5360 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5361 */
5362 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5363 {
5364 unsigned int mos, type, rm, cond, rn, op, nzcv;
5365 TCGv_i64 tcg_flags;
5366 TCGLabel *label_continue = NULL;
5367 int size;
5368
5369 mos = extract32(insn, 29, 3);
5370 type = extract32(insn, 22, 2);
5371 rm = extract32(insn, 16, 5);
5372 cond = extract32(insn, 12, 4);
5373 rn = extract32(insn, 5, 5);
5374 op = extract32(insn, 4, 1);
5375 nzcv = extract32(insn, 0, 4);
5376
5377 if (mos) {
5378 unallocated_encoding(s);
5379 return;
5380 }
5381
5382 switch (type) {
5383 case 0:
5384 size = MO_32;
5385 break;
5386 case 1:
5387 size = MO_64;
5388 break;
5389 case 3:
5390 size = MO_16;
5391 if (dc_isar_feature(aa64_fp16, s)) {
5392 break;
5393 }
5394 /* fallthru */
5395 default:
5396 unallocated_encoding(s);
5397 return;
5398 }
5399
5400 if (!fp_access_check(s)) {
5401 return;
5402 }
5403
5404 if (cond < 0x0e) { /* not always */
5405 TCGLabel *label_match = gen_new_label();
5406 label_continue = gen_new_label();
5407 arm_gen_test_cc(cond, label_match);
5408 /* nomatch: */
5409 tcg_flags = tcg_const_i64(nzcv << 28);
5410 gen_set_nzcv(tcg_flags);
5411 tcg_temp_free_i64(tcg_flags);
5412 tcg_gen_br(label_continue);
5413 gen_set_label(label_match);
5414 }
5415
5416 handle_fp_compare(s, size, rn, rm, false, op);
5417
5418 if (cond < 0x0e) {
5419 gen_set_label(label_continue);
5420 }
5421 }
5422
5423 /* Floating point conditional select
5424 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
5425 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5426 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd |
5427 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5428 */
5429 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5430 {
5431 unsigned int mos, type, rm, cond, rn, rd;
5432 TCGv_i64 t_true, t_false, t_zero;
5433 DisasCompare64 c;
5434 MemOp sz;
5435
5436 mos = extract32(insn, 29, 3);
5437 type = extract32(insn, 22, 2);
5438 rm = extract32(insn, 16, 5);
5439 cond = extract32(insn, 12, 4);
5440 rn = extract32(insn, 5, 5);
5441 rd = extract32(insn, 0, 5);
5442
5443 if (mos) {
5444 unallocated_encoding(s);
5445 return;
5446 }
5447
5448 switch (type) {
5449 case 0:
5450 sz = MO_32;
5451 break;
5452 case 1:
5453 sz = MO_64;
5454 break;
5455 case 3:
5456 sz = MO_16;
5457 if (dc_isar_feature(aa64_fp16, s)) {
5458 break;
5459 }
5460 /* fallthru */
5461 default:
5462 unallocated_encoding(s);
5463 return;
5464 }
5465
5466 if (!fp_access_check(s)) {
5467 return;
5468 }
5469
5470 /* Zero extend sreg & hreg inputs to 64 bits now. */
5471 t_true = tcg_temp_new_i64();
5472 t_false = tcg_temp_new_i64();
5473 read_vec_element(s, t_true, rn, 0, sz);
5474 read_vec_element(s, t_false, rm, 0, sz);
5475
5476 a64_test_cc(&c, cond);
5477 t_zero = tcg_const_i64(0);
5478 tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
5479 tcg_temp_free_i64(t_zero);
5480 tcg_temp_free_i64(t_false);
5481 a64_free_cc(&c);
5482
5483 /* Note that sregs & hregs write back zeros to the high bits,
5484 and we've already done the zero-extension. */
5485 write_fp_dreg(s, rd, t_true);
5486 tcg_temp_free_i64(t_true);
5487 }
5488
5489 /* Floating-point data-processing (1 source) - half precision */
5490 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5491 {
5492 TCGv_ptr fpst = NULL;
5493 TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5494 TCGv_i32 tcg_res = tcg_temp_new_i32();
5495
5496 switch (opcode) {
5497 case 0x0: /* FMOV */
5498 tcg_gen_mov_i32(tcg_res, tcg_op);
5499 break;
5500 case 0x1: /* FABS */
5501 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5502 break;
5503 case 0x2: /* FNEG */
5504 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5505 break;
5506 case 0x3: /* FSQRT */
5507 fpst = get_fpstatus_ptr(true);
5508 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5509 break;
5510 case 0x8: /* FRINTN */
5511 case 0x9: /* FRINTP */
5512 case 0xa: /* FRINTM */
5513 case 0xb: /* FRINTZ */
5514 case 0xc: /* FRINTA */
5515 {
5516 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
5517 fpst = get_fpstatus_ptr(true);
5518
5519 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5520 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5521
5522 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5523 tcg_temp_free_i32(tcg_rmode);
5524 break;
5525 }
5526 case 0xe: /* FRINTX */
5527 fpst = get_fpstatus_ptr(true);
5528 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5529 break;
5530 case 0xf: /* FRINTI */
5531 fpst = get_fpstatus_ptr(true);
5532 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5533 break;
5534 default:
5535 abort();
5536 }
5537
5538 write_fp_sreg(s, rd, tcg_res);
5539
5540 if (fpst) {
5541 tcg_temp_free_ptr(fpst);
5542 }
5543 tcg_temp_free_i32(tcg_op);
5544 tcg_temp_free_i32(tcg_res);
5545 }
5546
5547 /* Floating-point data-processing (1 source) - single precision */
5548 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
5549 {
5550 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
5551 TCGv_i32 tcg_op, tcg_res;
5552 TCGv_ptr fpst;
5553 int rmode = -1;
5554
5555 tcg_op = read_fp_sreg(s, rn);
5556 tcg_res = tcg_temp_new_i32();
5557
5558 switch (opcode) {
5559 case 0x0: /* FMOV */
5560 tcg_gen_mov_i32(tcg_res, tcg_op);
5561 goto done;
5562 case 0x1: /* FABS */
5563 gen_helper_vfp_abss(tcg_res, tcg_op);
5564 goto done;
5565 case 0x2: /* FNEG */
5566 gen_helper_vfp_negs(tcg_res, tcg_op);
5567 goto done;
5568 case 0x3: /* FSQRT */
5569 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
5570 goto done;
5571 case 0x8: /* FRINTN */
5572 case 0x9: /* FRINTP */
5573 case 0xa: /* FRINTM */
5574 case 0xb: /* FRINTZ */
5575 case 0xc: /* FRINTA */
5576 rmode = arm_rmode_to_sf(opcode & 7);
5577 gen_fpst = gen_helper_rints;
5578 break;
5579 case 0xe: /* FRINTX */
5580 gen_fpst = gen_helper_rints_exact;
5581 break;
5582 case 0xf: /* FRINTI */
5583 gen_fpst = gen_helper_rints;
5584 break;
5585 case 0x10: /* FRINT32Z */
5586 rmode = float_round_to_zero;
5587 gen_fpst = gen_helper_frint32_s;
5588 break;
5589 case 0x11: /* FRINT32X */
5590 gen_fpst = gen_helper_frint32_s;
5591 break;
5592 case 0x12: /* FRINT64Z */
5593 rmode = float_round_to_zero;
5594 gen_fpst = gen_helper_frint64_s;
5595 break;
5596 case 0x13: /* FRINT64X */
5597 gen_fpst = gen_helper_frint64_s;
5598 break;
5599 default:
5600 g_assert_not_reached();
5601 }
5602
5603 fpst = get_fpstatus_ptr(false);
5604 if (rmode >= 0) {
5605 TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
5606 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5607 gen_fpst(tcg_res, tcg_op, fpst);
5608 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5609 tcg_temp_free_i32(tcg_rmode);
5610 } else {
5611 gen_fpst(tcg_res, tcg_op, fpst);
5612 }
5613 tcg_temp_free_ptr(fpst);
5614
5615 done:
5616 write_fp_sreg(s, rd, tcg_res);
5617 tcg_temp_free_i32(tcg_op);
5618 tcg_temp_free_i32(tcg_res);
5619 }
5620
5621 /* Floating-point data-processing (1 source) - double precision */
5622 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
5623 {
5624 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
5625 TCGv_i64 tcg_op, tcg_res;
5626 TCGv_ptr fpst;
5627 int rmode = -1;
5628
5629 switch (opcode) {
5630 case 0x0: /* FMOV */
5631 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
5632 return;
5633 }
5634
5635 tcg_op = read_fp_dreg(s, rn);
5636 tcg_res = tcg_temp_new_i64();
5637
5638 switch (opcode) {
5639 case 0x1: /* FABS */
5640 gen_helper_vfp_absd(tcg_res, tcg_op);
5641 goto done;
5642 case 0x2: /* FNEG */
5643 gen_helper_vfp_negd(tcg_res, tcg_op);
5644 goto done;
5645 case 0x3: /* FSQRT */
5646 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
5647 goto done;
5648 case 0x8: /* FRINTN */
5649 case 0x9: /* FRINTP */
5650 case 0xa: /* FRINTM */
5651 case 0xb: /* FRINTZ */
5652 case 0xc: /* FRINTA */
5653 rmode = arm_rmode_to_sf(opcode & 7);
5654 gen_fpst = gen_helper_rintd;
5655 break;
5656 case 0xe: /* FRINTX */
5657 gen_fpst = gen_helper_rintd_exact;
5658 break;
5659 case 0xf: /* FRINTI */
5660 gen_fpst = gen_helper_rintd;
5661 break;
5662 case 0x10: /* FRINT32Z */
5663 rmode = float_round_to_zero;
5664 gen_fpst = gen_helper_frint32_d;
5665 break;
5666 case 0x11: /* FRINT32X */
5667 gen_fpst = gen_helper_frint32_d;
5668 break;
5669 case 0x12: /* FRINT64Z */
5670 rmode = float_round_to_zero;
5671 gen_fpst = gen_helper_frint64_d;
5672 break;
5673 case 0x13: /* FRINT64X */
5674 gen_fpst = gen_helper_frint64_d;
5675 break;
5676 default:
5677 g_assert_not_reached();
5678 }
5679
5680 fpst = get_fpstatus_ptr(false);
5681 if (rmode >= 0) {
5682 TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
5683 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5684 gen_fpst(tcg_res, tcg_op, fpst);
5685 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5686 tcg_temp_free_i32(tcg_rmode);
5687 } else {
5688 gen_fpst(tcg_res, tcg_op, fpst);
5689 }
5690 tcg_temp_free_ptr(fpst);
5691
5692 done:
5693 write_fp_dreg(s, rd, tcg_res);
5694 tcg_temp_free_i64(tcg_op);
5695 tcg_temp_free_i64(tcg_res);
5696 }
5697
5698 static void handle_fp_fcvt(DisasContext *s, int opcode,
5699 int rd, int rn, int dtype, int ntype)
5700 {
5701 switch (ntype) {
5702 case 0x0:
5703 {
5704 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5705 if (dtype == 1) {
5706 /* Single to double */
5707 TCGv_i64 tcg_rd = tcg_temp_new_i64();
5708 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
5709 write_fp_dreg(s, rd, tcg_rd);
5710 tcg_temp_free_i64(tcg_rd);
5711 } else {
5712 /* Single to half */
5713 TCGv_i32 tcg_rd = tcg_temp_new_i32();
5714 TCGv_i32 ahp = get_ahp_flag();
5715 TCGv_ptr fpst = get_fpstatus_ptr(false);
5716
5717 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5718 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5719 write_fp_sreg(s, rd, tcg_rd);
5720 tcg_temp_free_i32(tcg_rd);
5721 tcg_temp_free_i32(ahp);
5722 tcg_temp_free_ptr(fpst);
5723 }
5724 tcg_temp_free_i32(tcg_rn);
5725 break;
5726 }
5727 case 0x1:
5728 {
5729 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
5730 TCGv_i32 tcg_rd = tcg_temp_new_i32();
5731 if (dtype == 0) {
5732 /* Double to single */
5733 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
5734 } else {
5735 TCGv_ptr fpst = get_fpstatus_ptr(false);
5736 TCGv_i32 ahp = get_ahp_flag();
5737 /* Double to half */
5738 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5739 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5740 tcg_temp_free_ptr(fpst);
5741 tcg_temp_free_i32(ahp);
5742 }
5743 write_fp_sreg(s, rd, tcg_rd);
5744 tcg_temp_free_i32(tcg_rd);
5745 tcg_temp_free_i64(tcg_rn);
5746 break;
5747 }
5748 case 0x3:
5749 {
5750 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5751 TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
5752 TCGv_i32 tcg_ahp = get_ahp_flag();
5753 tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
5754 if (dtype == 0) {
5755 /* Half to single */
5756 TCGv_i32 tcg_rd = tcg_temp_new_i32();
5757 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5758 write_fp_sreg(s, rd, tcg_rd);
5759 tcg_temp_free_ptr(tcg_fpst);
5760 tcg_temp_free_i32(tcg_ahp);
5761 tcg_temp_free_i32(tcg_rd);
5762 } else {
5763 /* Half to double */
5764 TCGv_i64 tcg_rd = tcg_temp_new_i64();
5765 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5766 write_fp_dreg(s, rd, tcg_rd);
5767 tcg_temp_free_i64(tcg_rd);
5768 }
5769 tcg_temp_free_i32(tcg_rn);
5770 break;
5771 }
5772 default:
5773 abort();
5774 }
5775 }
5776
5777 /* Floating point data-processing (1 source)
5778 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0
5779 * +---+---+---+-----------+------+---+--------+-----------+------+------+
5780 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd |
5781 * +---+---+---+-----------+------+---+--------+-----------+------+------+
5782 */
5783 static void disas_fp_1src(DisasContext *s, uint32_t insn)
5784 {
5785 int mos = extract32(insn, 29, 3);
5786 int type = extract32(insn, 22, 2);
5787 int opcode = extract32(insn, 15, 6);
5788 int rn = extract32(insn, 5, 5);
5789 int rd = extract32(insn, 0, 5);
5790
5791 if (mos) {
5792 unallocated_encoding(s);
5793 return;
5794 }
5795
5796 switch (opcode) {
5797 case 0x4: case 0x5: case 0x7:
5798 {
5799 /* FCVT between half, single and double precision */
5800 int dtype = extract32(opcode, 0, 2);
5801 if (type == 2 || dtype == type) {
5802 unallocated_encoding(s);
5803 return;
5804 }
5805 if (!fp_access_check(s)) {
5806 return;
5807 }
5808
5809 handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
5810 break;
5811 }
5812
5813 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
5814 if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
5815 unallocated_encoding(s);
5816 return;
5817 }
5818 /* fall through */
5819 case 0x0 ... 0x3:
5820 case 0x8 ... 0xc:
5821 case 0xe ... 0xf:
5822 /* 32-to-32 and 64-to-64 ops */
5823 switch (type) {
5824 case 0:
5825 if (!fp_access_check(s)) {
5826 return;
5827 }
5828 handle_fp_1src_single(s, opcode, rd, rn);
5829 break;
5830 case 1:
5831 if (!fp_access_check(s)) {
5832 return;
5833 }
5834 handle_fp_1src_double(s, opcode, rd, rn);
5835 break;
5836 case 3:
5837 if (!dc_isar_feature(aa64_fp16, s)) {
5838 unallocated_encoding(s);
5839 return;
5840 }
5841
5842 if (!fp_access_check(s)) {
5843 return;
5844 }
5845 handle_fp_1src_half(s, opcode, rd, rn);
5846 break;
5847 default:
5848 unallocated_encoding(s);
5849 }
5850 break;
5851
5852 default:
5853 unallocated_encoding(s);
5854 break;
5855 }
5856 }
5857
5858 /* Floating-point data-processing (2 source) - single precision */
5859 static void handle_fp_2src_single(DisasContext *s, int opcode,
5860 int rd, int rn, int rm)
5861 {
5862 TCGv_i32 tcg_op1;
5863 TCGv_i32 tcg_op2;
5864 TCGv_i32 tcg_res;
5865 TCGv_ptr fpst;
5866
5867 tcg_res = tcg_temp_new_i32();
5868 fpst = get_fpstatus_ptr(false);
5869 tcg_op1 = read_fp_sreg(s, rn);
5870 tcg_op2 = read_fp_sreg(s, rm);
5871
5872 switch (opcode) {
5873 case 0x0: /* FMUL */
5874 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5875 break;
5876 case 0x1: /* FDIV */
5877 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
5878 break;
5879 case 0x2: /* FADD */
5880 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5881 break;
5882 case 0x3: /* FSUB */
5883 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
5884 break;
5885 case 0x4: /* FMAX */
5886 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5887 break;
5888 case 0x5: /* FMIN */
5889 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5890 break;
5891 case 0x6: /* FMAXNM */
5892 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5893 break;
5894 case 0x7: /* FMINNM */
5895 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5896 break;
5897 case 0x8: /* FNMUL */
5898 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5899 gen_helper_vfp_negs(tcg_res, tcg_res);
5900 break;
5901 }
5902
5903 write_fp_sreg(s, rd, tcg_res);
5904
5905 tcg_temp_free_ptr(fpst);
5906 tcg_temp_free_i32(tcg_op1);
5907 tcg_temp_free_i32(tcg_op2);
5908 tcg_temp_free_i32(tcg_res);
5909 }
5910
5911 /* Floating-point data-processing (2 source) - double precision */
5912 static void handle_fp_2src_double(DisasContext *s, int opcode,
5913 int rd, int rn, int rm)
5914 {
5915 TCGv_i64 tcg_op1;
5916 TCGv_i64 tcg_op2;
5917 TCGv_i64 tcg_res;
5918 TCGv_ptr fpst;
5919
5920 tcg_res = tcg_temp_new_i64();
5921 fpst = get_fpstatus_ptr(false);
5922 tcg_op1 = read_fp_dreg(s, rn);
5923 tcg_op2 = read_fp_dreg(s, rm);
5924
5925 switch (opcode) {
5926 case 0x0: /* FMUL */
5927 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5928 break;
5929 case 0x1: /* FDIV */
5930 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
5931 break;
5932 case 0x2: /* FADD */
5933 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5934 break;
5935 case 0x3: /* FSUB */
5936 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5937 break;
5938 case 0x4: /* FMAX */
5939 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5940 break;
5941 case 0x5: /* FMIN */
5942 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5943 break;
5944 case 0x6: /* FMAXNM */
5945 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5946 break;
5947 case 0x7: /* FMINNM */
5948 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5949 break;
5950 case 0x8: /* FNMUL */
5951 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5952 gen_helper_vfp_negd(tcg_res, tcg_res);
5953 break;
5954 }
5955
5956 write_fp_dreg(s, rd, tcg_res);
5957
5958 tcg_temp_free_ptr(fpst);
5959 tcg_temp_free_i64(tcg_op1);
5960 tcg_temp_free_i64(tcg_op2);
5961 tcg_temp_free_i64(tcg_res);
5962 }
5963
5964 /* Floating-point data-processing (2 source) - half precision */
5965 static void handle_fp_2src_half(DisasContext *s, int opcode,
5966 int rd, int rn, int rm)
5967 {
5968 TCGv_i32 tcg_op1;
5969 TCGv_i32 tcg_op2;
5970 TCGv_i32 tcg_res;
5971 TCGv_ptr fpst;
5972
5973 tcg_res = tcg_temp_new_i32();
5974 fpst = get_fpstatus_ptr(true);
5975 tcg_op1 = read_fp_hreg(s, rn);
5976 tcg_op2 = read_fp_hreg(s, rm);
5977
5978 switch (opcode) {
5979 case 0x0: /* FMUL */
5980 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
5981 break;
5982 case 0x1: /* FDIV */
5983 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
5984 break;
5985 case 0x2: /* FADD */
5986 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
5987 break;
5988 case 0x3: /* FSUB */
5989 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
5990 break;
5991 case 0x4: /* FMAX */
5992 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
5993 break;
5994 case 0x5: /* FMIN */
5995 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
5996 break;
5997 case 0x6: /* FMAXNM */
5998 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
5999 break;
6000 case 0x7: /* FMINNM */
6001 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6002 break;
6003 case 0x8: /* FNMUL */
6004 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6005 tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6006 break;
6007 default:
6008 g_assert_not_reached();
6009 }
6010
6011 write_fp_sreg(s, rd, tcg_res);
6012
6013 tcg_temp_free_ptr(fpst);
6014 tcg_temp_free_i32(tcg_op1);
6015 tcg_temp_free_i32(tcg_op2);
6016 tcg_temp_free_i32(tcg_res);
6017 }
6018
6019 /* Floating point data-processing (2 source)
6020 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
6021 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6022 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd |
6023 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6024 */
6025 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6026 {
6027 int mos = extract32(insn, 29, 3);
6028 int type = extract32(insn, 22, 2);
6029 int rd = extract32(insn, 0, 5);
6030 int rn = extract32(insn, 5, 5);
6031 int rm = extract32(insn, 16, 5);
6032 int opcode = extract32(insn, 12, 4);
6033
6034 if (opcode > 8 || mos) {
6035 unallocated_encoding(s);
6036 return;
6037 }
6038
6039 switch (type) {
6040 case 0:
6041 if (!fp_access_check(s)) {
6042 return;
6043 }
6044 handle_fp_2src_single(s, opcode, rd, rn, rm);
6045 break;
6046 case 1:
6047 if (!fp_access_check(s)) {
6048 return;
6049 }
6050 handle_fp_2src_double(s, opcode, rd, rn, rm);
6051 break;
6052 case 3:
6053 if (!dc_isar_feature(aa64_fp16, s)) {
6054 unallocated_encoding(s);
6055 return;
6056 }
6057 if (!fp_access_check(s)) {
6058 return;
6059 }
6060 handle_fp_2src_half(s, opcode, rd, rn, rm);
6061 break;
6062 default:
6063 unallocated_encoding(s);
6064 }
6065 }
6066
6067 /* Floating-point data-processing (3 source) - single precision */
6068 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6069 int rd, int rn, int rm, int ra)
6070 {
6071 TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6072 TCGv_i32 tcg_res = tcg_temp_new_i32();
6073 TCGv_ptr fpst = get_fpstatus_ptr(false);
6074
6075 tcg_op1 = read_fp_sreg(s, rn);
6076 tcg_op2 = read_fp_sreg(s, rm);
6077 tcg_op3 = read_fp_sreg(s, ra);
6078
6079 /* These are fused multiply-add, and must be done as one
6080 * floating point operation with no rounding between the
6081 * multiplication and addition steps.
6082 * NB that doing the negations here as separate steps is
6083 * correct : an input NaN should come out with its sign bit
6084 * flipped if it is a negated-input.
6085 */
6086 if (o1 == true) {
6087 gen_helper_vfp_negs(tcg_op3, tcg_op3);
6088 }
6089
6090 if (o0 != o1) {
6091 gen_helper_vfp_negs(tcg_op1, tcg_op1);
6092 }
6093
6094 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6095
6096 write_fp_sreg(s, rd, tcg_res);
6097
6098 tcg_temp_free_ptr(fpst);
6099 tcg_temp_free_i32(tcg_op1);
6100 tcg_temp_free_i32(tcg_op2);
6101 tcg_temp_free_i32(tcg_op3);
6102 tcg_temp_free_i32(tcg_res);
6103 }
6104
6105 /* Floating-point data-processing (3 source) - double precision */
6106 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6107 int rd, int rn, int rm, int ra)
6108 {
6109 TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6110 TCGv_i64 tcg_res = tcg_temp_new_i64();
6111 TCGv_ptr fpst = get_fpstatus_ptr(false);
6112
6113 tcg_op1 = read_fp_dreg(s, rn);
6114 tcg_op2 = read_fp_dreg(s, rm);
6115 tcg_op3 = read_fp_dreg(s, ra);
6116
6117 /* These are fused multiply-add, and must be done as one
6118 * floating point operation with no rounding between the
6119 * multiplication and addition steps.
6120 * NB that doing the negations here as separate steps is
6121 * correct : an input NaN should come out with its sign bit
6122 * flipped if it is a negated-input.
6123 */
6124 if (o1 == true) {
6125 gen_helper_vfp_negd(tcg_op3, tcg_op3);
6126 }
6127
6128 if (o0 != o1) {
6129 gen_helper_vfp_negd(tcg_op1, tcg_op1);
6130 }
6131
6132 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6133
6134 write_fp_dreg(s, rd, tcg_res);
6135
6136 tcg_temp_free_ptr(fpst);
6137 tcg_temp_free_i64(tcg_op1);
6138 tcg_temp_free_i64(tcg_op2);
6139 tcg_temp_free_i64(tcg_op3);
6140 tcg_temp_free_i64(tcg_res);
6141 }
6142
6143 /* Floating-point data-processing (3 source) - half precision */
6144 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6145 int rd, int rn, int rm, int ra)
6146 {
6147 TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6148 TCGv_i32 tcg_res = tcg_temp_new_i32();
6149 TCGv_ptr fpst = get_fpstatus_ptr(true);
6150
6151 tcg_op1 = read_fp_hreg(s, rn);
6152 tcg_op2 = read_fp_hreg(s, rm);
6153 tcg_op3 = read_fp_hreg(s, ra);
6154
6155 /* These are fused multiply-add, and must be done as one
6156 * floating point operation with no rounding between the
6157 * multiplication and addition steps.
6158 * NB that doing the negations here as separate steps is
6159 * correct : an input NaN should come out with its sign bit
6160 * flipped if it is a negated-input.
6161 */
6162 if (o1 == true) {
6163 tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6164 }
6165
6166 if (o0 != o1) {
6167 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6168 }
6169
6170 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6171
6172 write_fp_sreg(s, rd, tcg_res);
6173
6174 tcg_temp_free_ptr(fpst);
6175 tcg_temp_free_i32(tcg_op1);
6176 tcg_temp_free_i32(tcg_op2);
6177 tcg_temp_free_i32(tcg_op3);
6178 tcg_temp_free_i32(tcg_res);
6179 }
6180
6181 /* Floating point data-processing (3 source)
6182 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0
6183 * +---+---+---+-----------+------+----+------+----+------+------+------+
6184 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd |
6185 * +---+---+---+-----------+------+----+------+----+------+------+------+
6186 */
6187 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6188 {
6189 int mos = extract32(insn, 29, 3);
6190 int type = extract32(insn, 22, 2);
6191 int rd = extract32(insn, 0, 5);
6192 int rn = extract32(insn, 5, 5);
6193 int ra = extract32(insn, 10, 5);
6194 int rm = extract32(insn, 16, 5);
6195 bool o0 = extract32(insn, 15, 1);
6196 bool o1 = extract32(insn, 21, 1);
6197
6198 if (mos) {
6199 unallocated_encoding(s);
6200 return;
6201 }
6202
6203 switch (type) {
6204 case 0:
6205 if (!fp_access_check(s)) {
6206 return;
6207 }
6208 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6209 break;
6210 case 1:
6211 if (!fp_access_check(s)) {
6212 return;
6213 }
6214 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6215 break;
6216 case 3:
6217 if (!dc_isar_feature(aa64_fp16, s)) {
6218 unallocated_encoding(s);
6219 return;
6220 }
6221 if (!fp_access_check(s)) {
6222 return;
6223 }
6224 handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6225 break;
6226 default:
6227 unallocated_encoding(s);
6228 }
6229 }
6230
6231 /* Floating point immediate
6232 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
6233 * +---+---+---+-----------+------+---+------------+-------+------+------+
6234 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd |
6235 * +---+---+---+-----------+------+---+------------+-------+------+------+
6236 */
6237 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6238 {
6239 int rd = extract32(insn, 0, 5);
6240 int imm5 = extract32(insn, 5, 5);
6241 int imm8 = extract32(insn, 13, 8);
6242 int type = extract32(insn, 22, 2);
6243 int mos = extract32(insn, 29, 3);
6244 uint64_t imm;
6245 TCGv_i64 tcg_res;
6246 MemOp sz;
6247
6248 if (mos || imm5) {
6249 unallocated_encoding(s);
6250 return;
6251 }
6252
6253 switch (type) {
6254 case 0:
6255 sz = MO_32;
6256 break;
6257 case 1:
6258 sz = MO_64;
6259 break;
6260 case 3:
6261 sz = MO_16;
6262 if (dc_isar_feature(aa64_fp16, s)) {
6263 break;
6264 }
6265 /* fallthru */
6266 default:
6267 unallocated_encoding(s);
6268 return;
6269 }
6270
6271 if (!fp_access_check(s)) {
6272 return;
6273 }
6274
6275 imm = vfp_expand_imm(sz, imm8);
6276
6277 tcg_res = tcg_const_i64(imm);
6278 write_fp_dreg(s, rd, tcg_res);
6279 tcg_temp_free_i64(tcg_res);
6280 }
6281
6282 /* Handle floating point <=> fixed point conversions. Note that we can
6283 * also deal with fp <=> integer conversions as a special case (scale == 64)
6284 * OPTME: consider handling that special case specially or at least skipping
6285 * the call to scalbn in the helpers for zero shifts.
6286 */
6287 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6288 bool itof, int rmode, int scale, int sf, int type)
6289 {
6290 bool is_signed = !(opcode & 1);
6291 TCGv_ptr tcg_fpstatus;
6292 TCGv_i32 tcg_shift, tcg_single;
6293 TCGv_i64 tcg_double;
6294
6295 tcg_fpstatus = get_fpstatus_ptr(type == 3);
6296
6297 tcg_shift = tcg_const_i32(64 - scale);
6298
6299 if (itof) {
6300 TCGv_i64 tcg_int = cpu_reg(s, rn);
6301 if (!sf) {
6302 TCGv_i64 tcg_extend = new_tmp_a64(s);
6303
6304 if (is_signed) {
6305 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6306 } else {
6307 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6308 }
6309
6310 tcg_int = tcg_extend;
6311 }
6312
6313 switch (type) {
6314 case 1: /* float64 */
6315 tcg_double = tcg_temp_new_i64();
6316 if (is_signed) {
6317 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6318 tcg_shift, tcg_fpstatus);
6319 } else {
6320 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6321 tcg_shift, tcg_fpstatus);
6322 }
6323 write_fp_dreg(s, rd, tcg_double);
6324 tcg_temp_free_i64(tcg_double);
6325 break;
6326
6327 case 0: /* float32 */
6328 tcg_single = tcg_temp_new_i32();
6329 if (is_signed) {
6330 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6331 tcg_shift, tcg_fpstatus);
6332 } else {
6333 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6334 tcg_shift, tcg_fpstatus);
6335 }
6336 write_fp_sreg(s, rd, tcg_single);
6337 tcg_temp_free_i32(tcg_single);
6338 break;
6339
6340 case 3: /* float16 */
6341 tcg_single = tcg_temp_new_i32();
6342 if (is_signed) {
6343 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6344 tcg_shift, tcg_fpstatus);
6345 } else {
6346 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6347 tcg_shift, tcg_fpstatus);
6348 }
6349 write_fp_sreg(s, rd, tcg_single);
6350 tcg_temp_free_i32(tcg_single);
6351 break;
6352
6353 default:
6354 g_assert_not_reached();
6355 }
6356 } else {
6357 TCGv_i64 tcg_int = cpu_reg(s, rd);
6358 TCGv_i32 tcg_rmode;
6359
6360 if (extract32(opcode, 2, 1)) {
6361 /* There are too many rounding modes to all fit into rmode,
6362 * so FCVTA[US] is a special case.
6363 */
6364 rmode = FPROUNDING_TIEAWAY;
6365 }
6366
6367 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6368
6369 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
6370
6371 switch (type) {
6372 case 1: /* float64 */
6373 tcg_double = read_fp_dreg(s, rn);
6374 if (is_signed) {
6375 if (!sf) {
6376 gen_helper_vfp_tosld(tcg_int, tcg_double,
6377 tcg_shift, tcg_fpstatus);
6378 } else {
6379 gen_helper_vfp_tosqd(tcg_int, tcg_double,
6380 tcg_shift, tcg_fpstatus);
6381 }
6382 } else {
6383 if (!sf) {
6384 gen_helper_vfp_tould(tcg_int, tcg_double,
6385 tcg_shift, tcg_fpstatus);
6386 } else {
6387 gen_helper_vfp_touqd(tcg_int, tcg_double,
6388 tcg_shift, tcg_fpstatus);
6389 }
6390 }
6391 if (!sf) {
6392 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6393 }
6394 tcg_temp_free_i64(tcg_double);
6395 break;
6396
6397 case 0: /* float32 */
6398 tcg_single = read_fp_sreg(s, rn);
6399 if (sf) {
6400 if (is_signed) {
6401 gen_helper_vfp_tosqs(tcg_int, tcg_single,
6402 tcg_shift, tcg_fpstatus);
6403 } else {
6404 gen_helper_vfp_touqs(tcg_int, tcg_single,
6405 tcg_shift, tcg_fpstatus);
6406 }
6407 } else {
6408 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6409 if (is_signed) {
6410 gen_helper_vfp_tosls(tcg_dest, tcg_single,
6411 tcg_shift, tcg_fpstatus);
6412 } else {
6413 gen_helper_vfp_touls(tcg_dest, tcg_single,
6414 tcg_shift, tcg_fpstatus);
6415 }
6416 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6417 tcg_temp_free_i32(tcg_dest);
6418 }
6419 tcg_temp_free_i32(tcg_single);
6420 break;
6421
6422 case 3: /* float16 */
6423 tcg_single = read_fp_sreg(s, rn);
6424 if (sf) {
6425 if (is_signed) {
6426 gen_helper_vfp_tosqh(tcg_int, tcg_single,
6427 tcg_shift, tcg_fpstatus);
6428 } else {
6429 gen_helper_vfp_touqh(tcg_int, tcg_single,
6430 tcg_shift, tcg_fpstatus);
6431 }
6432 } else {
6433 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6434 if (is_signed) {
6435 gen_helper_vfp_toslh(tcg_dest, tcg_single,
6436 tcg_shift, tcg_fpstatus);
6437 } else {
6438 gen_helper_vfp_toulh(tcg_dest, tcg_single,
6439 tcg_shift, tcg_fpstatus);
6440 }
6441 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6442 tcg_temp_free_i32(tcg_dest);
6443 }
6444 tcg_temp_free_i32(tcg_single);
6445 break;
6446
6447 default:
6448 g_assert_not_reached();
6449 }
6450
6451 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
6452 tcg_temp_free_i32(tcg_rmode);
6453 }
6454
6455 tcg_temp_free_ptr(tcg_fpstatus);
6456 tcg_temp_free_i32(tcg_shift);
6457 }
6458
6459 /* Floating point <-> fixed point conversions
6460 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
6461 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6462 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd |
6463 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6464 */
6465 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6466 {
6467 int rd = extract32(insn, 0, 5);
6468 int rn = extract32(insn, 5, 5);
6469 int scale = extract32(insn, 10, 6);
6470 int opcode = extract32(insn, 16, 3);
6471 int rmode = extract32(insn, 19, 2);
6472 int type = extract32(insn, 22, 2);
6473 bool sbit = extract32(insn, 29, 1);
6474 bool sf = extract32(insn, 31, 1);
6475 bool itof;
6476
6477 if (sbit || (!sf && scale < 32)) {
6478 unallocated_encoding(s);
6479 return;
6480 }
6481
6482 switch (type) {
6483 case 0: /* float32 */
6484 case 1: /* float64 */
6485 break;
6486 case 3: /* float16 */
6487 if (dc_isar_feature(aa64_fp16, s)) {
6488 break;
6489 }
6490 /* fallthru */
6491 default:
6492 unallocated_encoding(s);
6493 return;
6494 }
6495
6496 switch ((rmode << 3) | opcode) {
6497 case 0x2: /* SCVTF */
6498 case 0x3: /* UCVTF */
6499 itof = true;
6500 break;
6501 case 0x18: /* FCVTZS */
6502 case 0x19: /* FCVTZU */
6503 itof = false;
6504 break;
6505 default:
6506 unallocated_encoding(s);
6507 return;
6508 }
6509
6510 if (!fp_access_check(s)) {
6511 return;
6512 }
6513
6514 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6515 }
6516
6517 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6518 {
6519 /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6520 * without conversion.
6521 */
6522
6523 if (itof) {
6524 TCGv_i64 tcg_rn = cpu_reg(s, rn);
6525 TCGv_i64 tmp;
6526
6527 switch (type) {
6528 case 0:
6529 /* 32 bit */
6530 tmp = tcg_temp_new_i64();
6531 tcg_gen_ext32u_i64(tmp, tcg_rn);
6532 write_fp_dreg(s, rd, tmp);
6533 tcg_temp_free_i64(tmp);
6534 break;
6535 case 1:
6536 /* 64 bit */
6537 write_fp_dreg(s, rd, tcg_rn);
6538 break;
6539 case 2:
6540 /* 64 bit to top half. */
6541 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6542 clear_vec_high(s, true, rd);
6543 break;
6544 case 3:
6545 /* 16 bit */
6546 tmp = tcg_temp_new_i64();
6547 tcg_gen_ext16u_i64(tmp, tcg_rn);
6548 write_fp_dreg(s, rd, tmp);
6549 tcg_temp_free_i64(tmp);
6550 break;
6551 default:
6552 g_assert_not_reached();
6553 }
6554 } else {
6555 TCGv_i64 tcg_rd = cpu_reg(s, rd);
6556
6557 switch (type) {
6558 case 0:
6559 /* 32 bit */
6560 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6561 break;
6562 case 1:
6563 /* 64 bit */
6564 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6565 break;
6566 case 2:
6567 /* 64 bits from top half */
6568 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
6569 break;
6570 case 3:
6571 /* 16 bit */
6572 tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
6573 break;
6574 default:
6575 g_assert_not_reached();
6576 }
6577 }
6578 }
6579
6580 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
6581 {
6582 TCGv_i64 t = read_fp_dreg(s, rn);
6583 TCGv_ptr fpstatus = get_fpstatus_ptr(false);
6584
6585 gen_helper_fjcvtzs(t, t, fpstatus);
6586
6587 tcg_temp_free_ptr(fpstatus);
6588
6589 tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
6590 tcg_gen_extrh_i64_i32(cpu_ZF, t);
6591 tcg_gen_movi_i32(cpu_CF, 0);
6592 tcg_gen_movi_i32(cpu_NF, 0);
6593 tcg_gen_movi_i32(cpu_VF, 0);
6594
6595 tcg_temp_free_i64(t);
6596 }
6597
6598 /* Floating point <-> integer conversions
6599 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
6600 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6601 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6602 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6603 */
6604 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6605 {
6606 int rd = extract32(insn, 0, 5);
6607 int rn = extract32(insn, 5, 5);
6608 int opcode = extract32(insn, 16, 3);
6609 int rmode = extract32(insn, 19, 2);
6610 int type = extract32(insn, 22, 2);
6611 bool sbit = extract32(insn, 29, 1);
6612 bool sf = extract32(insn, 31, 1);
6613 bool itof = false;
6614
6615 if (sbit) {
6616 goto do_unallocated;
6617 }
6618
6619 switch (opcode) {
6620 case 2: /* SCVTF */
6621 case 3: /* UCVTF */
6622 itof = true;
6623 /* fallthru */
6624 case 4: /* FCVTAS */
6625 case 5: /* FCVTAU */
6626 if (rmode != 0) {
6627 goto do_unallocated;
6628 }
6629 /* fallthru */
6630 case 0: /* FCVT[NPMZ]S */
6631 case 1: /* FCVT[NPMZ]U */
6632 switch (type) {
6633 case 0: /* float32 */
6634 case 1: /* float64 */
6635 break;
6636 case 3: /* float16 */
6637 if (!dc_isar_feature(aa64_fp16, s)) {
6638 goto do_unallocated;
6639 }
6640 break;
6641 default:
6642 goto do_unallocated;
6643 }
6644 if (!fp_access_check(s)) {
6645 return;
6646 }
6647 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
6648 break;
6649
6650 default:
6651 switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
6652 case 0b01100110: /* FMOV half <-> 32-bit int */
6653 case 0b01100111:
6654 case 0b11100110: /* FMOV half <-> 64-bit int */
6655 case 0b11100111:
6656 if (!dc_isar_feature(aa64_fp16, s)) {
6657 goto do_unallocated;
6658 }
6659 /* fallthru */
6660 case 0b00000110: /* FMOV 32-bit */
6661 case 0b00000111:
6662 case 0b10100110: /* FMOV 64-bit */
6663 case 0b10100111:
6664 case 0b11001110: /* FMOV top half of 128-bit */
6665 case 0b11001111:
6666 if (!fp_access_check(s)) {
6667 return;
6668 }
6669 itof = opcode & 1;
6670 handle_fmov(s, rd, rn, type, itof);
6671 break;
6672
6673 case 0b00111110: /* FJCVTZS */
6674 if (!dc_isar_feature(aa64_jscvt, s)) {
6675 goto do_unallocated;
6676 } else if (fp_access_check(s)) {
6677 handle_fjcvtzs(s, rd, rn);
6678 }
6679 break;
6680
6681 default:
6682 do_unallocated:
6683 unallocated_encoding(s);
6684 return;
6685 }
6686 break;
6687 }
6688 }
6689
6690 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6691 * 31 30 29 28 25 24 0
6692 * +---+---+---+---------+-----------------------------+
6693 * | | 0 | | 1 1 1 1 | |
6694 * +---+---+---+---------+-----------------------------+
6695 */
6696 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
6697 {
6698 if (extract32(insn, 24, 1)) {
6699 /* Floating point data-processing (3 source) */
6700 disas_fp_3src(s, insn);
6701 } else if (extract32(insn, 21, 1) == 0) {
6702 /* Floating point to fixed point conversions */
6703 disas_fp_fixed_conv(s, insn);
6704 } else {
6705 switch (extract32(insn, 10, 2)) {
6706 case 1:
6707 /* Floating point conditional compare */
6708 disas_fp_ccomp(s, insn);
6709 break;
6710 case 2:
6711 /* Floating point data-processing (2 source) */
6712 disas_fp_2src(s, insn);
6713 break;
6714 case 3:
6715 /* Floating point conditional select */
6716 disas_fp_csel(s, insn);
6717 break;
6718 case 0:
6719 switch (ctz32(extract32(insn, 12, 4))) {
6720 case 0: /* [15:12] == xxx1 */
6721 /* Floating point immediate */
6722 disas_fp_imm(s, insn);
6723 break;
6724 case 1: /* [15:12] == xx10 */
6725 /* Floating point compare */
6726 disas_fp_compare(s, insn);
6727 break;
6728 case 2: /* [15:12] == x100 */
6729 /* Floating point data-processing (1 source) */
6730 disas_fp_1src(s, insn);
6731 break;
6732 case 3: /* [15:12] == 1000 */
6733 unallocated_encoding(s);
6734 break;
6735 default: /* [15:12] == 0000 */
6736 /* Floating point <-> integer conversions */
6737 disas_fp_int_conv(s, insn);
6738 break;
6739 }
6740 break;
6741 }
6742 }
6743 }
6744
6745 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
6746 int pos)
6747 {
6748 /* Extract 64 bits from the middle of two concatenated 64 bit
6749 * vector register slices left:right. The extracted bits start
6750 * at 'pos' bits into the right (least significant) side.
6751 * We return the result in tcg_right, and guarantee not to
6752 * trash tcg_left.
6753 */
6754 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6755 assert(pos > 0 && pos < 64);
6756
6757 tcg_gen_shri_i64(tcg_right, tcg_right, pos);
6758 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
6759 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
6760
6761 tcg_temp_free_i64(tcg_tmp);
6762 }
6763
6764 /* EXT
6765 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
6766 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6767 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
6768 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6769 */
6770 static void disas_simd_ext(DisasContext *s, uint32_t insn)
6771 {
6772 int is_q = extract32(insn, 30, 1);
6773 int op2 = extract32(insn, 22, 2);
6774 int imm4 = extract32(insn, 11, 4);
6775 int rm = extract32(insn, 16, 5);
6776 int rn = extract32(insn, 5, 5);
6777 int rd = extract32(insn, 0, 5);
6778 int pos = imm4 << 3;
6779 TCGv_i64 tcg_resl, tcg_resh;
6780
6781 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
6782 unallocated_encoding(s);
6783 return;
6784 }
6785
6786 if (!fp_access_check(s)) {
6787 return;
6788 }
6789
6790 tcg_resh = tcg_temp_new_i64();
6791 tcg_resl = tcg_temp_new_i64();
6792
6793 /* Vd gets bits starting at pos bits into Vm:Vn. This is
6794 * either extracting 128 bits from a 128:128 concatenation, or
6795 * extracting 64 bits from a 64:64 concatenation.
6796 */
6797 if (!is_q) {
6798 read_vec_element(s, tcg_resl, rn, 0, MO_64);
6799 if (pos != 0) {
6800 read_vec_element(s, tcg_resh, rm, 0, MO_64);
6801 do_ext64(s, tcg_resh, tcg_resl, pos);
6802 }
6803 tcg_gen_movi_i64(tcg_resh, 0);
6804 } else {
6805 TCGv_i64 tcg_hh;
6806 typedef struct {
6807 int reg;
6808 int elt;
6809 } EltPosns;
6810 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
6811 EltPosns *elt = eltposns;
6812
6813 if (pos >= 64) {
6814 elt++;
6815 pos -= 64;
6816 }
6817
6818 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
6819 elt++;
6820 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
6821 elt++;
6822 if (pos != 0) {
6823 do_ext64(s, tcg_resh, tcg_resl, pos);
6824 tcg_hh = tcg_temp_new_i64();
6825 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
6826 do_ext64(s, tcg_hh, tcg_resh, pos);
6827 tcg_temp_free_i64(tcg_hh);
6828 }
6829 }
6830
6831 write_vec_element(s, tcg_resl, rd, 0, MO_64);
6832 tcg_temp_free_i64(tcg_resl);
6833 write_vec_element(s, tcg_resh, rd, 1, MO_64);
6834 tcg_temp_free_i64(tcg_resh);
6835 }
6836
6837 /* TBL/TBX
6838 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
6839 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6840 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
6841 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6842 */
6843 static void disas_simd_tb(DisasContext *s, uint32_t insn)
6844 {
6845 int op2 = extract32(insn, 22, 2);
6846 int is_q = extract32(insn, 30, 1);
6847 int rm = extract32(insn, 16, 5);
6848 int rn = extract32(insn, 5, 5);
6849 int rd = extract32(insn, 0, 5);
6850 int is_tblx = extract32(insn, 12, 1);
6851 int len = extract32(insn, 13, 2);
6852 TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
6853 TCGv_i32 tcg_regno, tcg_numregs;
6854
6855 if (op2 != 0) {
6856 unallocated_encoding(s);
6857 return;
6858 }
6859
6860 if (!fp_access_check(s)) {
6861 return;
6862 }
6863
6864 /* This does a table lookup: for every byte element in the input
6865 * we index into a table formed from up to four vector registers,
6866 * and then the output is the result of the lookups. Our helper
6867 * function does the lookup operation for a single 64 bit part of
6868 * the input.
6869 */
6870 tcg_resl = tcg_temp_new_i64();
6871 tcg_resh = tcg_temp_new_i64();
6872
6873 if (is_tblx) {
6874 read_vec_element(s, tcg_resl, rd, 0, MO_64);
6875 } else {
6876 tcg_gen_movi_i64(tcg_resl, 0);
6877 }
6878 if (is_tblx && is_q) {
6879 read_vec_element(s, tcg_resh, rd, 1, MO_64);
6880 } else {
6881 tcg_gen_movi_i64(tcg_resh, 0);
6882 }
6883
6884 tcg_idx = tcg_temp_new_i64();
6885 tcg_regno = tcg_const_i32(rn);
6886 tcg_numregs = tcg_const_i32(len + 1);
6887 read_vec_element(s, tcg_idx, rm, 0, MO_64);
6888 gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
6889 tcg_regno, tcg_numregs);
6890 if (is_q) {
6891 read_vec_element(s, tcg_idx, rm, 1, MO_64);
6892 gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
6893 tcg_regno, tcg_numregs);
6894 }
6895 tcg_temp_free_i64(tcg_idx);
6896 tcg_temp_free_i32(tcg_regno);
6897 tcg_temp_free_i32(tcg_numregs);
6898
6899 write_vec_element(s, tcg_resl, rd, 0, MO_64);
6900 tcg_temp_free_i64(tcg_resl);
6901 write_vec_element(s, tcg_resh, rd, 1, MO_64);
6902 tcg_temp_free_i64(tcg_resh);
6903 }
6904
6905 /* ZIP/UZP/TRN
6906 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
6907 * +---+---+-------------+------+---+------+---+------------------+------+
6908 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
6909 * +---+---+-------------+------+---+------+---+------------------+------+
6910 */
6911 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
6912 {
6913 int rd = extract32(insn, 0, 5);
6914 int rn = extract32(insn, 5, 5);
6915 int rm = extract32(insn, 16, 5);
6916 int size = extract32(insn, 22, 2);
6917 /* opc field bits [1:0] indicate ZIP/UZP/TRN;
6918 * bit 2 indicates 1 vs 2 variant of the insn.
6919 */
6920 int opcode = extract32(insn, 12, 2);
6921 bool part = extract32(insn, 14, 1);
6922 bool is_q = extract32(insn, 30, 1);
6923 int esize = 8 << size;
6924 int i, ofs;
6925 int datasize = is_q ? 128 : 64;
6926 int elements = datasize / esize;
6927 TCGv_i64 tcg_res, tcg_resl, tcg_resh;
6928
6929 if (opcode == 0 || (size == 3 && !is_q)) {
6930 unallocated_encoding(s);
6931 return;
6932 }
6933
6934 if (!fp_access_check(s)) {
6935 return;
6936 }
6937
6938 tcg_resl = tcg_const_i64(0);
6939 tcg_resh = tcg_const_i64(0);
6940 tcg_res = tcg_temp_new_i64();
6941
6942 for (i = 0; i < elements; i++) {
6943 switch (opcode) {
6944 case 1: /* UZP1/2 */
6945 {
6946 int midpoint = elements / 2;
6947 if (i < midpoint) {
6948 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
6949 } else {
6950 read_vec_element(s, tcg_res, rm,
6951 2 * (i - midpoint) + part, size);
6952 }
6953 break;
6954 }
6955 case 2: /* TRN1/2 */
6956 if (i & 1) {
6957 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
6958 } else {
6959 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
6960 }
6961 break;
6962 case 3: /* ZIP1/2 */
6963 {
6964 int base = part * elements / 2;
6965 if (i & 1) {
6966 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
6967 } else {
6968 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
6969 }
6970 break;
6971 }
6972 default:
6973 g_assert_not_reached();
6974 }
6975
6976 ofs = i * esize;
6977 if (ofs < 64) {
6978 tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
6979 tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
6980 } else {
6981 tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
6982 tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
6983 }
6984 }
6985
6986 tcg_temp_free_i64(tcg_res);
6987
6988 write_vec_element(s, tcg_resl, rd, 0, MO_64);
6989 tcg_temp_free_i64(tcg_resl);
6990 write_vec_element(s, tcg_resh, rd, 1, MO_64);
6991 tcg_temp_free_i64(tcg_resh);
6992 }
6993
6994 /*
6995 * do_reduction_op helper
6996 *
6997 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
6998 * important for correct NaN propagation that we do these
6999 * operations in exactly the order specified by the pseudocode.
7000 *
7001 * This is a recursive function, TCG temps should be freed by the
7002 * calling function once it is done with the values.
7003 */
7004 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7005 int esize, int size, int vmap, TCGv_ptr fpst)
7006 {
7007 if (esize == size) {
7008 int element;
7009 MemOp msize = esize == 16 ? MO_16 : MO_32;
7010 TCGv_i32 tcg_elem;
7011
7012 /* We should have one register left here */
7013 assert(ctpop8(vmap) == 1);
7014 element = ctz32(vmap);
7015 assert(element < 8);
7016
7017 tcg_elem = tcg_temp_new_i32();
7018 read_vec_element_i32(s, tcg_elem, rn, element, msize);
7019 return tcg_elem;
7020 } else {
7021 int bits = size / 2;
7022 int shift = ctpop8(vmap) / 2;
7023 int vmap_lo = (vmap >> shift) & vmap;
7024 int vmap_hi = (vmap & ~vmap_lo);
7025 TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7026
7027 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7028 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7029 tcg_res = tcg_temp_new_i32();
7030
7031 switch (fpopcode) {
7032 case 0x0c: /* fmaxnmv half-precision */
7033 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7034 break;
7035 case 0x0f: /* fmaxv half-precision */
7036 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7037 break;
7038 case 0x1c: /* fminnmv half-precision */
7039 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7040 break;
7041 case 0x1f: /* fminv half-precision */
7042 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7043 break;
7044 case 0x2c: /* fmaxnmv */
7045 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7046 break;
7047 case 0x2f: /* fmaxv */
7048 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7049 break;
7050 case 0x3c: /* fminnmv */
7051 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7052 break;
7053 case 0x3f: /* fminv */
7054 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7055 break;
7056 default:
7057 g_assert_not_reached();
7058 }
7059
7060 tcg_temp_free_i32(tcg_hi);
7061 tcg_temp_free_i32(tcg_lo);
7062 return tcg_res;
7063 }
7064 }
7065
7066 /* AdvSIMD across lanes
7067 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7068 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7069 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
7070 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7071 */
7072 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7073 {
7074 int rd = extract32(insn, 0, 5);
7075 int rn = extract32(insn, 5, 5);
7076 int size = extract32(insn, 22, 2);
7077 int opcode = extract32(insn, 12, 5);
7078 bool is_q = extract32(insn, 30, 1);
7079 bool is_u = extract32(insn, 29, 1);
7080 bool is_fp = false;
7081 bool is_min = false;
7082 int esize;
7083 int elements;
7084 int i;
7085 TCGv_i64 tcg_res, tcg_elt;
7086
7087 switch (opcode) {
7088 case 0x1b: /* ADDV */
7089 if (is_u) {
7090 unallocated_encoding(s);
7091 return;
7092 }
7093 /* fall through */
7094 case 0x3: /* SADDLV, UADDLV */
7095 case 0xa: /* SMAXV, UMAXV */
7096 case 0x1a: /* SMINV, UMINV */
7097 if (size == 3 || (size == 2 && !is_q)) {
7098 unallocated_encoding(s);
7099 return;
7100 }
7101 break;
7102 case 0xc: /* FMAXNMV, FMINNMV */
7103 case 0xf: /* FMAXV, FMINV */
7104 /* Bit 1 of size field encodes min vs max and the actual size
7105 * depends on the encoding of the U bit. If not set (and FP16
7106 * enabled) then we do half-precision float instead of single
7107 * precision.
7108 */
7109 is_min = extract32(size, 1, 1);
7110 is_fp = true;
7111 if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7112 size = 1;
7113 } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7114 unallocated_encoding(s);
7115 return;
7116 } else {
7117 size = 2;
7118 }
7119 break;
7120 default:
7121 unallocated_encoding(s);
7122 return;
7123 }
7124
7125 if (!fp_access_check(s)) {
7126 return;
7127 }
7128
7129 esize = 8 << size;
7130 elements = (is_q ? 128 : 64) / esize;
7131
7132 tcg_res = tcg_temp_new_i64();
7133 tcg_elt = tcg_temp_new_i64();
7134
7135 /* These instructions operate across all lanes of a vector
7136 * to produce a single result. We can guarantee that a 64
7137 * bit intermediate is sufficient:
7138 * + for [US]ADDLV the maximum element size is 32 bits, and
7139 * the result type is 64 bits
7140 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7141 * same as the element size, which is 32 bits at most
7142 * For the integer operations we can choose to work at 64
7143 * or 32 bits and truncate at the end; for simplicity
7144 * we use 64 bits always. The floating point
7145 * ops do require 32 bit intermediates, though.
7146 */
7147 if (!is_fp) {
7148 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7149
7150 for (i = 1; i < elements; i++) {
7151 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7152
7153 switch (opcode) {
7154 case 0x03: /* SADDLV / UADDLV */
7155 case 0x1b: /* ADDV */
7156 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7157 break;
7158 case 0x0a: /* SMAXV / UMAXV */
7159 if (is_u) {
7160 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7161 } else {
7162 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7163 }
7164 break;
7165 case 0x1a: /* SMINV / UMINV */
7166 if (is_u) {
7167 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7168 } else {
7169 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7170 }
7171 break;
7172 default:
7173 g_assert_not_reached();
7174 }
7175
7176 }
7177 } else {
7178 /* Floating point vector reduction ops which work across 32
7179 * bit (single) or 16 bit (half-precision) intermediates.
7180 * Note that correct NaN propagation requires that we do these
7181 * operations in exactly the order specified by the pseudocode.
7182 */
7183 TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
7184 int fpopcode = opcode | is_min << 4 | is_u << 5;
7185 int vmap = (1 << elements) - 1;
7186 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7187 (is_q ? 128 : 64), vmap, fpst);
7188 tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7189 tcg_temp_free_i32(tcg_res32);
7190 tcg_temp_free_ptr(fpst);
7191 }
7192
7193 tcg_temp_free_i64(tcg_elt);
7194
7195 /* Now truncate the result to the width required for the final output */
7196 if (opcode == 0x03) {
7197 /* SADDLV, UADDLV: result is 2*esize */
7198 size++;
7199 }
7200
7201 switch (size) {
7202 case 0:
7203 tcg_gen_ext8u_i64(tcg_res, tcg_res);
7204 break;
7205 case 1:
7206 tcg_gen_ext16u_i64(tcg_res, tcg_res);
7207 break;
7208 case 2:
7209 tcg_gen_ext32u_i64(tcg_res, tcg_res);
7210 break;
7211 case 3:
7212 break;
7213 default:
7214 g_assert_not_reached();
7215 }
7216
7217 write_fp_dreg(s, rd, tcg_res);
7218 tcg_temp_free_i64(tcg_res);
7219 }
7220
7221 /* DUP (Element, Vector)
7222 *
7223 * 31 30 29 21 20 16 15 10 9 5 4 0
7224 * +---+---+-------------------+--------+-------------+------+------+
7225 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
7226 * +---+---+-------------------+--------+-------------+------+------+
7227 *
7228 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7229 */
7230 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7231 int imm5)
7232 {
7233 int size = ctz32(imm5);
7234 int index = imm5 >> (size + 1);
7235
7236 if (size > 3 || (size == 3 && !is_q)) {
7237 unallocated_encoding(s);
7238 return;
7239 }
7240
7241 if (!fp_access_check(s)) {
7242 return;
7243 }
7244
7245 tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7246 vec_reg_offset(s, rn, index, size),
7247 is_q ? 16 : 8, vec_full_reg_size(s));
7248 }
7249
7250 /* DUP (element, scalar)
7251 * 31 21 20 16 15 10 9 5 4 0
7252 * +-----------------------+--------+-------------+------+------+
7253 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
7254 * +-----------------------+--------+-------------+------+------+
7255 */
7256 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7257 int imm5)
7258 {
7259 int size = ctz32(imm5);
7260 int index;
7261 TCGv_i64 tmp;
7262
7263 if (size > 3) {
7264 unallocated_encoding(s);
7265 return;
7266 }
7267
7268 if (!fp_access_check(s)) {
7269 return;
7270 }
7271
7272 index = imm5 >> (size + 1);
7273
7274 /* This instruction just extracts the specified element and
7275 * zero-extends it into the bottom of the destination register.
7276 */
7277 tmp = tcg_temp_new_i64();
7278 read_vec_element(s, tmp, rn, index, size);
7279 write_fp_dreg(s, rd, tmp);
7280 tcg_temp_free_i64(tmp);
7281 }
7282
7283 /* DUP (General)
7284 *
7285 * 31 30 29 21 20 16 15 10 9 5 4 0
7286 * +---+---+-------------------+--------+-------------+------+------+
7287 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
7288 * +---+---+-------------------+--------+-------------+------+------+
7289 *
7290 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7291 */
7292 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7293 int imm5)
7294 {
7295 int size = ctz32(imm5);
7296 uint32_t dofs, oprsz, maxsz;
7297
7298 if (size > 3 || ((size == 3) && !is_q)) {
7299 unallocated_encoding(s);
7300 return;
7301 }
7302
7303 if (!fp_access_check(s)) {
7304 return;
7305 }
7306
7307 dofs = vec_full_reg_offset(s, rd);
7308 oprsz = is_q ? 16 : 8;
7309 maxsz = vec_full_reg_size(s);
7310
7311 tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7312 }
7313
7314 /* INS (Element)
7315 *
7316 * 31 21 20 16 15 14 11 10 9 5 4 0
7317 * +-----------------------+--------+------------+---+------+------+
7318 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7319 * +-----------------------+--------+------------+---+------+------+
7320 *
7321 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7322 * index: encoded in imm5<4:size+1>
7323 */
7324 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7325 int imm4, int imm5)
7326 {
7327 int size = ctz32(imm5);
7328 int src_index, dst_index;
7329 TCGv_i64 tmp;
7330
7331 if (size > 3) {
7332 unallocated_encoding(s);
7333 return;
7334 }
7335
7336 if (!fp_access_check(s)) {
7337 return;
7338 }
7339
7340 dst_index = extract32(imm5, 1+size, 5);
7341 src_index = extract32(imm4, size, 4);
7342
7343 tmp = tcg_temp_new_i64();
7344
7345 read_vec_element(s, tmp, rn, src_index, size);
7346 write_vec_element(s, tmp, rd, dst_index, size);
7347
7348 tcg_temp_free_i64(tmp);
7349 }
7350
7351
7352 /* INS (General)
7353 *
7354 * 31 21 20 16 15 10 9 5 4 0
7355 * +-----------------------+--------+-------------+------+------+
7356 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
7357 * +-----------------------+--------+-------------+------+------+
7358 *
7359 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7360 * index: encoded in imm5<4:size+1>
7361 */
7362 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7363 {
7364 int size = ctz32(imm5);
7365 int idx;
7366
7367 if (size > 3) {
7368 unallocated_encoding(s);
7369 return;
7370 }
7371
7372 if (!fp_access_check(s)) {
7373 return;
7374 }
7375
7376 idx = extract32(imm5, 1 + size, 4 - size);
7377 write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7378 }
7379
7380 /*
7381 * UMOV (General)
7382 * SMOV (General)
7383 *
7384 * 31 30 29 21 20 16 15 12 10 9 5 4 0
7385 * +---+---+-------------------+--------+-------------+------+------+
7386 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
7387 * +---+---+-------------------+--------+-------------+------+------+
7388 *
7389 * U: unsigned when set
7390 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7391 */
7392 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7393 int rn, int rd, int imm5)
7394 {
7395 int size = ctz32(imm5);
7396 int element;
7397 TCGv_i64 tcg_rd;
7398
7399 /* Check for UnallocatedEncodings */
7400 if (is_signed) {
7401 if (size > 2 || (size == 2 && !is_q)) {
7402 unallocated_encoding(s);
7403 return;
7404 }
7405 } else {
7406 if (size > 3
7407 || (size < 3 && is_q)
7408 || (size == 3 && !is_q)) {
7409 unallocated_encoding(s);
7410 return;
7411 }
7412 }
7413
7414 if (!fp_access_check(s)) {
7415 return;
7416 }
7417
7418 element = extract32(imm5, 1+size, 4);
7419
7420 tcg_rd = cpu_reg(s, rd);
7421 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7422 if (is_signed && !is_q) {
7423 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7424 }
7425 }
7426
7427 /* AdvSIMD copy
7428 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
7429 * +---+---+----+-----------------+------+---+------+---+------+------+
7430 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7431 * +---+---+----+-----------------+------+---+------+---+------+------+
7432 */
7433 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7434 {
7435 int rd = extract32(insn, 0, 5);
7436 int rn = extract32(insn, 5, 5);
7437 int imm4 = extract32(insn, 11, 4);
7438 int op = extract32(insn, 29, 1);
7439 int is_q = extract32(insn, 30, 1);
7440 int imm5 = extract32(insn, 16, 5);
7441
7442 if (op) {
7443 if (is_q) {
7444 /* INS (element) */
7445 handle_simd_inse(s, rd, rn, imm4, imm5);
7446 } else {
7447 unallocated_encoding(s);
7448 }
7449 } else {
7450 switch (imm4) {
7451 case 0:
7452 /* DUP (element - vector) */
7453 handle_simd_dupe(s, is_q, rd, rn, imm5);
7454 break;
7455 case 1:
7456 /* DUP (general) */
7457 handle_simd_dupg(s, is_q, rd, rn, imm5);
7458 break;
7459 case 3:
7460 if (is_q) {
7461 /* INS (general) */
7462 handle_simd_insg(s, rd, rn, imm5);
7463 } else {
7464 unallocated_encoding(s);
7465 }
7466 break;
7467 case 5:
7468 case 7:
7469 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7470 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7471 break;
7472 default:
7473 unallocated_encoding(s);
7474 break;
7475 }
7476 }
7477 }
7478
7479 /* AdvSIMD modified immediate
7480 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
7481 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7482 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
7483 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7484 *
7485 * There are a number of operations that can be carried out here:
7486 * MOVI - move (shifted) imm into register
7487 * MVNI - move inverted (shifted) imm into register
7488 * ORR - bitwise OR of (shifted) imm with register
7489 * BIC - bitwise clear of (shifted) imm with register
7490 * With ARMv8.2 we also have:
7491 * FMOV half-precision
7492 */
7493 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7494 {
7495 int rd = extract32(insn, 0, 5);
7496 int cmode = extract32(insn, 12, 4);
7497 int cmode_3_1 = extract32(cmode, 1, 3);
7498 int cmode_0 = extract32(cmode, 0, 1);
7499 int o2 = extract32(insn, 11, 1);
7500 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7501 bool is_neg = extract32(insn, 29, 1);
7502 bool is_q = extract32(insn, 30, 1);
7503 uint64_t imm = 0;
7504
7505 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7506 /* Check for FMOV (vector, immediate) - half-precision */
7507 if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7508 unallocated_encoding(s);
7509 return;
7510 }
7511 }
7512
7513 if (!fp_access_check(s)) {
7514 return;
7515 }
7516
7517 /* See AdvSIMDExpandImm() in ARM ARM */
7518 switch (cmode_3_1) {
7519 case 0: /* Replicate(Zeros(24):imm8, 2) */
7520 case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
7521 case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
7522 case 3: /* Replicate(imm8:Zeros(24), 2) */
7523 {
7524 int shift = cmode_3_1 * 8;
7525 imm = bitfield_replicate(abcdefgh << shift, 32);
7526 break;
7527 }
7528 case 4: /* Replicate(Zeros(8):imm8, 4) */
7529 case 5: /* Replicate(imm8:Zeros(8), 4) */
7530 {
7531 int shift = (cmode_3_1 & 0x1) * 8;
7532 imm = bitfield_replicate(abcdefgh << shift, 16);
7533 break;
7534 }
7535 case 6:
7536 if (cmode_0) {
7537 /* Replicate(Zeros(8):imm8:Ones(16), 2) */
7538 imm = (abcdefgh << 16) | 0xffff;
7539 } else {
7540 /* Replicate(Zeros(16):imm8:Ones(8), 2) */
7541 imm = (abcdefgh << 8) | 0xff;
7542 }
7543 imm = bitfield_replicate(imm, 32);
7544 break;
7545 case 7:
7546 if (!cmode_0 && !is_neg) {
7547 imm = bitfield_replicate(abcdefgh, 8);
7548 } else if (!cmode_0 && is_neg) {
7549 int i;
7550 imm = 0;
7551 for (i = 0; i < 8; i++) {
7552 if ((abcdefgh) & (1 << i)) {
7553 imm |= 0xffULL << (i * 8);
7554 }
7555 }
7556 } else if (cmode_0) {
7557 if (is_neg) {
7558 imm = (abcdefgh & 0x3f) << 48;
7559 if (abcdefgh & 0x80) {
7560 imm |= 0x8000000000000000ULL;
7561 }
7562 if (abcdefgh & 0x40) {
7563 imm |= 0x3fc0000000000000ULL;
7564 } else {
7565 imm |= 0x4000000000000000ULL;
7566 }
7567 } else {
7568 if (o2) {
7569 /* FMOV (vector, immediate) - half-precision */
7570 imm = vfp_expand_imm(MO_16, abcdefgh);
7571 /* now duplicate across the lanes */
7572 imm = bitfield_replicate(imm, 16);
7573 } else {
7574 imm = (abcdefgh & 0x3f) << 19;
7575 if (abcdefgh & 0x80) {
7576 imm |= 0x80000000;
7577 }
7578 if (abcdefgh & 0x40) {
7579 imm |= 0x3e000000;
7580 } else {
7581 imm |= 0x40000000;
7582 }
7583 imm |= (imm << 32);
7584 }
7585 }
7586 }
7587 break;
7588 default:
7589 fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
7590 g_assert_not_reached();
7591 }
7592
7593 if (cmode_3_1 != 7 && is_neg) {
7594 imm = ~imm;
7595 }
7596
7597 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7598 /* MOVI or MVNI, with MVNI negation handled above. */
7599 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7600 vec_full_reg_size(s), imm);
7601 } else {
7602 /* ORR or BIC, with BIC negation to AND handled above. */
7603 if (is_neg) {
7604 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7605 } else {
7606 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7607 }
7608 }
7609 }
7610
7611 /* AdvSIMD scalar copy
7612 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
7613 * +-----+----+-----------------+------+---+------+---+------+------+
7614 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7615 * +-----+----+-----------------+------+---+------+---+------+------+
7616 */
7617 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7618 {
7619 int rd = extract32(insn, 0, 5);
7620 int rn = extract32(insn, 5, 5);
7621 int imm4 = extract32(insn, 11, 4);
7622 int imm5 = extract32(insn, 16, 5);
7623 int op = extract32(insn, 29, 1);
7624
7625 if (op != 0 || imm4 != 0) {
7626 unallocated_encoding(s);
7627 return;
7628 }
7629
7630 /* DUP (element, scalar) */
7631 handle_simd_dupes(s, rd, rn, imm5);
7632 }
7633
7634 /* AdvSIMD scalar pairwise
7635 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7636 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7637 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
7638 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7639 */
7640 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7641 {
7642 int u = extract32(insn, 29, 1);
7643 int size = extract32(insn, 22, 2);
7644 int opcode = extract32(insn, 12, 5);
7645 int rn = extract32(insn, 5, 5);
7646 int rd = extract32(insn, 0, 5);
7647 TCGv_ptr fpst;
7648
7649 /* For some ops (the FP ones), size[1] is part of the encoding.
7650 * For ADDP strictly it is not but size[1] is always 1 for valid
7651 * encodings.
7652 */
7653 opcode |= (extract32(size, 1, 1) << 5);
7654
7655 switch (opcode) {
7656 case 0x3b: /* ADDP */
7657 if (u || size != 3) {
7658 unallocated_encoding(s);
7659 return;
7660 }
7661 if (!fp_access_check(s)) {
7662 return;
7663 }
7664
7665 fpst = NULL;
7666 break;
7667 case 0xc: /* FMAXNMP */
7668 case 0xd: /* FADDP */
7669 case 0xf: /* FMAXP */
7670 case 0x2c: /* FMINNMP */
7671 case 0x2f: /* FMINP */
7672 /* FP op, size[0] is 32 or 64 bit*/
7673 if (!u) {
7674 if (!dc_isar_feature(aa64_fp16, s)) {
7675 unallocated_encoding(s);
7676 return;
7677 } else {
7678 size = MO_16;
7679 }
7680 } else {
7681 size = extract32(size, 0, 1) ? MO_64 : MO_32;
7682 }
7683
7684 if (!fp_access_check(s)) {
7685 return;
7686 }
7687
7688 fpst = get_fpstatus_ptr(size == MO_16);
7689 break;
7690 default:
7691 unallocated_encoding(s);
7692 return;
7693 }
7694
7695 if (size == MO_64) {
7696 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7697 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7698 TCGv_i64 tcg_res = tcg_temp_new_i64();
7699
7700 read_vec_element(s, tcg_op1, rn, 0, MO_64);
7701 read_vec_element(s, tcg_op2, rn, 1, MO_64);
7702
7703 switch (opcode) {
7704 case 0x3b: /* ADDP */
7705 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7706 break;
7707 case 0xc: /* FMAXNMP */
7708 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7709 break;
7710 case 0xd: /* FADDP */
7711 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7712 break;
7713 case 0xf: /* FMAXP */
7714 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7715 break;
7716 case 0x2c: /* FMINNMP */
7717 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7718 break;
7719 case 0x2f: /* FMINP */
7720 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7721 break;
7722 default:
7723 g_assert_not_reached();
7724 }
7725
7726 write_fp_dreg(s, rd, tcg_res);
7727
7728 tcg_temp_free_i64(tcg_op1);
7729 tcg_temp_free_i64(tcg_op2);
7730 tcg_temp_free_i64(tcg_res);
7731 } else {
7732 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7733 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7734 TCGv_i32 tcg_res = tcg_temp_new_i32();
7735
7736 read_vec_element_i32(s, tcg_op1, rn, 0, size);
7737 read_vec_element_i32(s, tcg_op2, rn, 1, size);
7738
7739 if (size == MO_16) {
7740 switch (opcode) {
7741 case 0xc: /* FMAXNMP */
7742 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7743 break;
7744 case 0xd: /* FADDP */
7745 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
7746 break;
7747 case 0xf: /* FMAXP */
7748 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
7749 break;
7750 case 0x2c: /* FMINNMP */
7751 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7752 break;
7753 case 0x2f: /* FMINP */
7754 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
7755 break;
7756 default:
7757 g_assert_not_reached();
7758 }
7759 } else {
7760 switch (opcode) {
7761 case 0xc: /* FMAXNMP */
7762 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7763 break;
7764 case 0xd: /* FADDP */
7765 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7766 break;
7767 case 0xf: /* FMAXP */
7768 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7769 break;
7770 case 0x2c: /* FMINNMP */
7771 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7772 break;
7773 case 0x2f: /* FMINP */
7774 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7775 break;
7776 default:
7777 g_assert_not_reached();
7778 }
7779 }
7780
7781 write_fp_sreg(s, rd, tcg_res);
7782
7783 tcg_temp_free_i32(tcg_op1);
7784 tcg_temp_free_i32(tcg_op2);
7785 tcg_temp_free_i32(tcg_res);
7786 }
7787
7788 if (fpst) {
7789 tcg_temp_free_ptr(fpst);
7790 }
7791 }
7792
7793 /*
7794 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7795 *
7796 * This code is handles the common shifting code and is used by both
7797 * the vector and scalar code.
7798 */
7799 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
7800 TCGv_i64 tcg_rnd, bool accumulate,
7801 bool is_u, int size, int shift)
7802 {
7803 bool extended_result = false;
7804 bool round = tcg_rnd != NULL;
7805 int ext_lshift = 0;
7806 TCGv_i64 tcg_src_hi;
7807
7808 if (round && size == 3) {
7809 extended_result = true;
7810 ext_lshift = 64 - shift;
7811 tcg_src_hi = tcg_temp_new_i64();
7812 } else if (shift == 64) {
7813 if (!accumulate && is_u) {
7814 /* result is zero */
7815 tcg_gen_movi_i64(tcg_res, 0);
7816 return;
7817 }
7818 }
7819
7820 /* Deal with the rounding step */
7821 if (round) {
7822 if (extended_result) {
7823 TCGv_i64 tcg_zero = tcg_const_i64(0);
7824 if (!is_u) {
7825 /* take care of sign extending tcg_res */
7826 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
7827 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7828 tcg_src, tcg_src_hi,
7829 tcg_rnd, tcg_zero);
7830 } else {
7831 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7832 tcg_src, tcg_zero,
7833 tcg_rnd, tcg_zero);
7834 }
7835 tcg_temp_free_i64(tcg_zero);
7836 } else {
7837 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
7838 }
7839 }
7840
7841 /* Now do the shift right */
7842 if (round && extended_result) {
7843 /* extended case, >64 bit precision required */
7844 if (ext_lshift == 0) {
7845 /* special case, only high bits matter */
7846 tcg_gen_mov_i64(tcg_src, tcg_src_hi);
7847 } else {
7848 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7849 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
7850 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
7851 }
7852 } else {
7853 if (is_u) {
7854 if (shift == 64) {
7855 /* essentially shifting in 64 zeros */
7856 tcg_gen_movi_i64(tcg_src, 0);
7857 } else {
7858 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7859 }
7860 } else {
7861 if (shift == 64) {
7862 /* effectively extending the sign-bit */
7863 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
7864 } else {
7865 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
7866 }
7867 }
7868 }
7869
7870 if (accumulate) {
7871 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
7872 } else {
7873 tcg_gen_mov_i64(tcg_res, tcg_src);
7874 }
7875
7876 if (extended_result) {
7877 tcg_temp_free_i64(tcg_src_hi);
7878 }
7879 }
7880
7881 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
7882 static void handle_scalar_simd_shri(DisasContext *s,
7883 bool is_u, int immh, int immb,
7884 int opcode, int rn, int rd)
7885 {
7886 const int size = 3;
7887 int immhb = immh << 3 | immb;
7888 int shift = 2 * (8 << size) - immhb;
7889 bool accumulate = false;
7890 bool round = false;
7891 bool insert = false;
7892 TCGv_i64 tcg_rn;
7893 TCGv_i64 tcg_rd;
7894 TCGv_i64 tcg_round;
7895
7896 if (!extract32(immh, 3, 1)) {
7897 unallocated_encoding(s);
7898 return;
7899 }
7900
7901 if (!fp_access_check(s)) {
7902 return;
7903 }
7904
7905 switch (opcode) {
7906 case 0x02: /* SSRA / USRA (accumulate) */
7907 accumulate = true;
7908 break;
7909 case 0x04: /* SRSHR / URSHR (rounding) */
7910 round = true;
7911 break;
7912 case 0x06: /* SRSRA / URSRA (accum + rounding) */
7913 accumulate = round = true;
7914 break;
7915 case 0x08: /* SRI */
7916 insert = true;
7917 break;
7918 }
7919
7920 if (round) {
7921 uint64_t round_const = 1ULL << (shift - 1);
7922 tcg_round = tcg_const_i64(round_const);
7923 } else {
7924 tcg_round = NULL;
7925 }
7926
7927 tcg_rn = read_fp_dreg(s, rn);
7928 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7929
7930 if (insert) {
7931 /* shift count same as element size is valid but does nothing;
7932 * special case to avoid potential shift by 64.
7933 */
7934 int esize = 8 << size;
7935 if (shift != esize) {
7936 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
7937 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
7938 }
7939 } else {
7940 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
7941 accumulate, is_u, size, shift);
7942 }
7943
7944 write_fp_dreg(s, rd, tcg_rd);
7945
7946 tcg_temp_free_i64(tcg_rn);
7947 tcg_temp_free_i64(tcg_rd);
7948 if (round) {
7949 tcg_temp_free_i64(tcg_round);
7950 }
7951 }
7952
7953 /* SHL/SLI - Scalar shift left */
7954 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
7955 int immh, int immb, int opcode,
7956 int rn, int rd)
7957 {
7958 int size = 32 - clz32(immh) - 1;
7959 int immhb = immh << 3 | immb;
7960 int shift = immhb - (8 << size);
7961 TCGv_i64 tcg_rn = new_tmp_a64(s);
7962 TCGv_i64 tcg_rd = new_tmp_a64(s);
7963
7964 if (!extract32(immh, 3, 1)) {
7965 unallocated_encoding(s);
7966 return;
7967 }
7968
7969 if (!fp_access_check(s)) {
7970 return;
7971 }
7972
7973 tcg_rn = read_fp_dreg(s, rn);
7974 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7975
7976 if (insert) {
7977 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
7978 } else {
7979 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
7980 }
7981
7982 write_fp_dreg(s, rd, tcg_rd);
7983
7984 tcg_temp_free_i64(tcg_rn);
7985 tcg_temp_free_i64(tcg_rd);
7986 }
7987
7988 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
7989 * (signed/unsigned) narrowing */
7990 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
7991 bool is_u_shift, bool is_u_narrow,
7992 int immh, int immb, int opcode,
7993 int rn, int rd)
7994 {
7995 int immhb = immh << 3 | immb;
7996 int size = 32 - clz32(immh) - 1;
7997 int esize = 8 << size;
7998 int shift = (2 * esize) - immhb;
7999 int elements = is_scalar ? 1 : (64 / esize);
8000 bool round = extract32(opcode, 0, 1);
8001 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8002 TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8003 TCGv_i32 tcg_rd_narrowed;
8004 TCGv_i64 tcg_final;
8005
8006 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8007 { gen_helper_neon_narrow_sat_s8,
8008 gen_helper_neon_unarrow_sat8 },
8009 { gen_helper_neon_narrow_sat_s16,
8010 gen_helper_neon_unarrow_sat16 },
8011 { gen_helper_neon_narrow_sat_s32,
8012 gen_helper_neon_unarrow_sat32 },
8013 { NULL, NULL },
8014 };
8015 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8016 gen_helper_neon_narrow_sat_u8,
8017 gen_helper_neon_narrow_sat_u16,
8018 gen_helper_neon_narrow_sat_u32,
8019 NULL
8020 };
8021 NeonGenNarrowEnvFn *narrowfn;
8022
8023 int i;
8024
8025 assert(size < 4);
8026
8027 if (extract32(immh, 3, 1)) {
8028 unallocated_encoding(s);
8029 return;
8030 }
8031
8032 if (!fp_access_check(s)) {
8033 return;
8034 }
8035
8036 if (is_u_shift) {
8037 narrowfn = unsigned_narrow_fns[size];
8038 } else {
8039 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8040 }
8041
8042 tcg_rn = tcg_temp_new_i64();
8043 tcg_rd = tcg_temp_new_i64();
8044 tcg_rd_narrowed = tcg_temp_new_i32();
8045 tcg_final = tcg_const_i64(0);
8046
8047 if (round) {
8048 uint64_t round_const = 1ULL << (shift - 1);
8049 tcg_round = tcg_const_i64(round_const);
8050 } else {
8051 tcg_round = NULL;
8052 }
8053
8054 for (i = 0; i < elements; i++) {
8055 read_vec_element(s, tcg_rn, rn, i, ldop);
8056 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8057 false, is_u_shift, size+1, shift);
8058 narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8059 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8060 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8061 }
8062
8063 if (!is_q) {
8064 write_vec_element(s, tcg_final, rd, 0, MO_64);
8065 } else {
8066 write_vec_element(s, tcg_final, rd, 1, MO_64);
8067 }
8068
8069 if (round) {
8070 tcg_temp_free_i64(tcg_round);
8071 }
8072 tcg_temp_free_i64(tcg_rn);
8073 tcg_temp_free_i64(tcg_rd);
8074 tcg_temp_free_i32(tcg_rd_narrowed);
8075 tcg_temp_free_i64(tcg_final);
8076
8077 clear_vec_high(s, is_q, rd);
8078 }
8079
8080 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8081 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8082 bool src_unsigned, bool dst_unsigned,
8083 int immh, int immb, int rn, int rd)
8084 {
8085 int immhb = immh << 3 | immb;
8086 int size = 32 - clz32(immh) - 1;
8087 int shift = immhb - (8 << size);
8088 int pass;
8089
8090 assert(immh != 0);
8091 assert(!(scalar && is_q));
8092
8093 if (!scalar) {
8094 if (!is_q && extract32(immh, 3, 1)) {
8095 unallocated_encoding(s);
8096 return;
8097 }
8098
8099 /* Since we use the variable-shift helpers we must
8100 * replicate the shift count into each element of
8101 * the tcg_shift value.
8102 */
8103 switch (size) {
8104 case 0:
8105 shift |= shift << 8;
8106 /* fall through */
8107 case 1:
8108 shift |= shift << 16;
8109 break;
8110 case 2:
8111 case 3:
8112 break;
8113 default:
8114 g_assert_not_reached();
8115 }
8116 }
8117
8118 if (!fp_access_check(s)) {
8119 return;
8120 }
8121
8122 if (size == 3) {
8123 TCGv_i64 tcg_shift = tcg_const_i64(shift);
8124 static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8125 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8126 { NULL, gen_helper_neon_qshl_u64 },
8127 };
8128 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8129 int maxpass = is_q ? 2 : 1;
8130
8131 for (pass = 0; pass < maxpass; pass++) {
8132 TCGv_i64 tcg_op = tcg_temp_new_i64();
8133
8134 read_vec_element(s, tcg_op, rn, pass, MO_64);
8135 genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8136 write_vec_element(s, tcg_op, rd, pass, MO_64);
8137
8138 tcg_temp_free_i64(tcg_op);
8139 }
8140 tcg_temp_free_i64(tcg_shift);
8141 clear_vec_high(s, is_q, rd);
8142 } else {
8143 TCGv_i32 tcg_shift = tcg_const_i32(shift);
8144 static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8145 {
8146 { gen_helper_neon_qshl_s8,
8147 gen_helper_neon_qshl_s16,
8148 gen_helper_neon_qshl_s32 },
8149 { gen_helper_neon_qshlu_s8,
8150 gen_helper_neon_qshlu_s16,
8151 gen_helper_neon_qshlu_s32 }
8152 }, {
8153 { NULL, NULL, NULL },
8154 { gen_helper_neon_qshl_u8,
8155 gen_helper_neon_qshl_u16,
8156 gen_helper_neon_qshl_u32 }
8157 }
8158 };
8159 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8160 MemOp memop = scalar ? size : MO_32;
8161 int maxpass = scalar ? 1 : is_q ? 4 : 2;
8162
8163 for (pass = 0; pass < maxpass; pass++) {
8164 TCGv_i32 tcg_op = tcg_temp_new_i32();
8165
8166 read_vec_element_i32(s, tcg_op, rn, pass, memop);
8167 genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8168 if (scalar) {
8169 switch (size) {
8170 case 0:
8171 tcg_gen_ext8u_i32(tcg_op, tcg_op);
8172 break;
8173 case 1:
8174 tcg_gen_ext16u_i32(tcg_op, tcg_op);
8175 break;
8176 case 2:
8177 break;
8178 default:
8179 g_assert_not_reached();
8180 }
8181 write_fp_sreg(s, rd, tcg_op);
8182 } else {
8183 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8184 }
8185
8186 tcg_temp_free_i32(tcg_op);
8187 }
8188 tcg_temp_free_i32(tcg_shift);
8189
8190 if (!scalar) {
8191 clear_vec_high(s, is_q, rd);
8192 }
8193 }
8194 }
8195
8196 /* Common vector code for handling integer to FP conversion */
8197 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8198 int elements, int is_signed,
8199 int fracbits, int size)
8200 {
8201 TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
8202 TCGv_i32 tcg_shift = NULL;
8203
8204 MemOp mop = size | (is_signed ? MO_SIGN : 0);
8205 int pass;
8206
8207 if (fracbits || size == MO_64) {
8208 tcg_shift = tcg_const_i32(fracbits);
8209 }
8210
8211 if (size == MO_64) {
8212 TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8213 TCGv_i64 tcg_double = tcg_temp_new_i64();
8214
8215 for (pass = 0; pass < elements; pass++) {
8216 read_vec_element(s, tcg_int64, rn, pass, mop);
8217
8218 if (is_signed) {
8219 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8220 tcg_shift, tcg_fpst);
8221 } else {
8222 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8223 tcg_shift, tcg_fpst);
8224 }
8225 if (elements == 1) {
8226 write_fp_dreg(s, rd, tcg_double);
8227 } else {
8228 write_vec_element(s, tcg_double, rd, pass, MO_64);
8229 }
8230 }
8231
8232 tcg_temp_free_i64(tcg_int64);
8233 tcg_temp_free_i64(tcg_double);
8234
8235 } else {
8236 TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8237 TCGv_i32 tcg_float = tcg_temp_new_i32();
8238
8239 for (pass = 0; pass < elements; pass++) {
8240 read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8241
8242 switch (size) {
8243 case MO_32:
8244 if (fracbits) {
8245 if (is_signed) {
8246 gen_helper_vfp_sltos(tcg_float, tcg_int32,
8247 tcg_shift, tcg_fpst);
8248 } else {
8249 gen_helper_vfp_ultos(tcg_float, tcg_int32,
8250 tcg_shift, tcg_fpst);
8251 }
8252 } else {
8253 if (is_signed) {
8254 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8255 } else {
8256 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8257 }
8258 }
8259 break;
8260 case MO_16:
8261 if (fracbits) {
8262 if (is_signed) {
8263 gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8264 tcg_shift, tcg_fpst);
8265 } else {
8266 gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8267 tcg_shift, tcg_fpst);
8268 }
8269 } else {
8270 if (is_signed) {
8271 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8272 } else {
8273 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8274 }
8275 }
8276 break;
8277 default:
8278 g_assert_not_reached();
8279 }
8280
8281 if (elements == 1) {
8282 write_fp_sreg(s, rd, tcg_float);
8283 } else {
8284 write_vec_element_i32(s, tcg_float, rd, pass, size);
8285 }
8286 }
8287
8288 tcg_temp_free_i32(tcg_int32);
8289 tcg_temp_free_i32(tcg_float);
8290 }
8291
8292 tcg_temp_free_ptr(tcg_fpst);
8293 if (tcg_shift) {
8294 tcg_temp_free_i32(tcg_shift);
8295 }
8296
8297 clear_vec_high(s, elements << size == 16, rd);
8298 }
8299
8300 /* UCVTF/SCVTF - Integer to FP conversion */
8301 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8302 bool is_q, bool is_u,
8303 int immh, int immb, int opcode,
8304 int rn, int rd)
8305 {
8306 int size, elements, fracbits;
8307 int immhb = immh << 3 | immb;
8308
8309 if (immh & 8) {
8310 size = MO_64;
8311 if (!is_scalar && !is_q) {
8312 unallocated_encoding(s);
8313 return;
8314 }
8315 } else if (immh & 4) {
8316 size = MO_32;
8317 } else if (immh & 2) {
8318 size = MO_16;
8319 if (!dc_isar_feature(aa64_fp16, s)) {
8320 unallocated_encoding(s);
8321 return;
8322 }
8323 } else {
8324 /* immh == 0 would be a failure of the decode logic */
8325 g_assert(immh == 1);
8326 unallocated_encoding(s);
8327 return;
8328 }
8329
8330 if (is_scalar) {
8331 elements = 1;
8332 } else {
8333 elements = (8 << is_q) >> size;
8334 }
8335 fracbits = (16 << size) - immhb;
8336
8337 if (!fp_access_check(s)) {
8338 return;
8339 }
8340
8341 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8342 }
8343
8344 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8345 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8346 bool is_q, bool is_u,
8347 int immh, int immb, int rn, int rd)
8348 {
8349 int immhb = immh << 3 | immb;
8350 int pass, size, fracbits;
8351 TCGv_ptr tcg_fpstatus;
8352 TCGv_i32 tcg_rmode, tcg_shift;
8353
8354 if (immh & 0x8) {
8355 size = MO_64;
8356 if (!is_scalar && !is_q) {
8357 unallocated_encoding(s);
8358 return;
8359 }
8360 } else if (immh & 0x4) {
8361 size = MO_32;
8362 } else if (immh & 0x2) {
8363 size = MO_16;
8364 if (!dc_isar_feature(aa64_fp16, s)) {
8365 unallocated_encoding(s);
8366 return;
8367 }
8368 } else {
8369 /* Should have split out AdvSIMD modified immediate earlier. */
8370 assert(immh == 1);
8371 unallocated_encoding(s);
8372 return;
8373 }
8374
8375 if (!fp_access_check(s)) {
8376 return;
8377 }
8378
8379 assert(!(is_scalar && is_q));
8380
8381 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
8382 tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
8383 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8384 fracbits = (16 << size) - immhb;
8385 tcg_shift = tcg_const_i32(fracbits);
8386
8387 if (size == MO_64) {
8388 int maxpass = is_scalar ? 1 : 2;
8389
8390 for (pass = 0; pass < maxpass; pass++) {
8391 TCGv_i64 tcg_op = tcg_temp_new_i64();
8392
8393 read_vec_element(s, tcg_op, rn, pass, MO_64);
8394 if (is_u) {
8395 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8396 } else {
8397 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8398 }
8399 write_vec_element(s, tcg_op, rd, pass, MO_64);
8400 tcg_temp_free_i64(tcg_op);
8401 }
8402 clear_vec_high(s, is_q, rd);
8403 } else {
8404 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8405 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8406
8407 switch (size) {
8408 case MO_16:
8409 if (is_u) {
8410 fn = gen_helper_vfp_touhh;
8411 } else {
8412 fn = gen_helper_vfp_toshh;
8413 }
8414 break;
8415 case MO_32:
8416 if (is_u) {
8417 fn = gen_helper_vfp_touls;
8418 } else {
8419 fn = gen_helper_vfp_tosls;
8420 }
8421 break;
8422 default:
8423 g_assert_not_reached();
8424 }
8425
8426 for (pass = 0; pass < maxpass; pass++) {
8427 TCGv_i32 tcg_op = tcg_temp_new_i32();
8428
8429 read_vec_element_i32(s, tcg_op, rn, pass, size);
8430 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8431 if (is_scalar) {
8432 write_fp_sreg(s, rd, tcg_op);
8433 } else {
8434 write_vec_element_i32(s, tcg_op, rd, pass, size);
8435 }
8436 tcg_temp_free_i32(tcg_op);
8437 }
8438 if (!is_scalar) {
8439 clear_vec_high(s, is_q, rd);
8440 }
8441 }
8442
8443 tcg_temp_free_ptr(tcg_fpstatus);
8444 tcg_temp_free_i32(tcg_shift);
8445 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8446 tcg_temp_free_i32(tcg_rmode);
8447 }
8448
8449 /* AdvSIMD scalar shift by immediate
8450 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
8451 * +-----+---+-------------+------+------+--------+---+------+------+
8452 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
8453 * +-----+---+-------------+------+------+--------+---+------+------+
8454 *
8455 * This is the scalar version so it works on a fixed sized registers
8456 */
8457 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8458 {
8459 int rd = extract32(insn, 0, 5);
8460 int rn = extract32(insn, 5, 5);
8461 int opcode = extract32(insn, 11, 5);
8462 int immb = extract32(insn, 16, 3);
8463 int immh = extract32(insn, 19, 4);
8464 bool is_u = extract32(insn, 29, 1);
8465
8466 if (immh == 0) {
8467 unallocated_encoding(s);
8468 return;
8469 }
8470
8471 switch (opcode) {
8472 case 0x08: /* SRI */
8473 if (!is_u) {
8474 unallocated_encoding(s);
8475 return;
8476 }
8477 /* fall through */
8478 case 0x00: /* SSHR / USHR */
8479 case 0x02: /* SSRA / USRA */
8480 case 0x04: /* SRSHR / URSHR */
8481 case 0x06: /* SRSRA / URSRA */
8482 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8483 break;
8484 case 0x0a: /* SHL / SLI */
8485 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8486 break;
8487 case 0x1c: /* SCVTF, UCVTF */
8488 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8489 opcode, rn, rd);
8490 break;
8491 case 0x10: /* SQSHRUN, SQSHRUN2 */
8492 case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8493 if (!is_u) {
8494 unallocated_encoding(s);
8495 return;
8496 }
8497 handle_vec_simd_sqshrn(s, true, false, false, true,
8498 immh, immb, opcode, rn, rd);
8499 break;
8500 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8501 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8502 handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8503 immh, immb, opcode, rn, rd);
8504 break;
8505 case 0xc: /* SQSHLU */
8506 if (!is_u) {
8507 unallocated_encoding(s);
8508 return;
8509 }
8510 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8511 break;
8512 case 0xe: /* SQSHL, UQSHL */
8513 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8514 break;
8515 case 0x1f: /* FCVTZS, FCVTZU */
8516 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8517 break;
8518 default:
8519 unallocated_encoding(s);
8520 break;
8521 }
8522 }
8523
8524 /* AdvSIMD scalar three different
8525 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
8526 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8527 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
8528 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8529 */
8530 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8531 {
8532 bool is_u = extract32(insn, 29, 1);
8533 int size = extract32(insn, 22, 2);
8534 int opcode = extract32(insn, 12, 4);
8535 int rm = extract32(insn, 16, 5);
8536 int rn = extract32(insn, 5, 5);
8537 int rd = extract32(insn, 0, 5);
8538
8539 if (is_u) {
8540 unallocated_encoding(s);
8541 return;
8542 }
8543
8544 switch (opcode) {
8545 case 0x9: /* SQDMLAL, SQDMLAL2 */
8546 case 0xb: /* SQDMLSL, SQDMLSL2 */
8547 case 0xd: /* SQDMULL, SQDMULL2 */
8548 if (size == 0 || size == 3) {
8549 unallocated_encoding(s);
8550 return;
8551 }
8552 break;
8553 default:
8554 unallocated_encoding(s);
8555 return;
8556 }
8557
8558 if (!fp_access_check(s)) {
8559 return;
8560 }
8561
8562 if (size == 2) {
8563 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8564 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8565 TCGv_i64 tcg_res = tcg_temp_new_i64();
8566
8567 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8568 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8569
8570 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8571 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8572
8573 switch (opcode) {
8574 case 0xd: /* SQDMULL, SQDMULL2 */
8575 break;
8576 case 0xb: /* SQDMLSL, SQDMLSL2 */
8577 tcg_gen_neg_i64(tcg_res, tcg_res);
8578 /* fall through */
8579 case 0x9: /* SQDMLAL, SQDMLAL2 */
8580 read_vec_element(s, tcg_op1, rd, 0, MO_64);
8581 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8582 tcg_res, tcg_op1);
8583 break;
8584 default:
8585 g_assert_not_reached();
8586 }
8587
8588 write_fp_dreg(s, rd, tcg_res);
8589
8590 tcg_temp_free_i64(tcg_op1);
8591 tcg_temp_free_i64(tcg_op2);
8592 tcg_temp_free_i64(tcg_res);
8593 } else {
8594 TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8595 TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8596 TCGv_i64 tcg_res = tcg_temp_new_i64();
8597
8598 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8599 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8600
8601 switch (opcode) {
8602 case 0xd: /* SQDMULL, SQDMULL2 */
8603 break;
8604 case 0xb: /* SQDMLSL, SQDMLSL2 */
8605 gen_helper_neon_negl_u32(tcg_res, tcg_res);
8606 /* fall through */
8607 case 0x9: /* SQDMLAL, SQDMLAL2 */
8608 {
8609 TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8610 read_vec_element(s, tcg_op3, rd, 0, MO_32);
8611 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8612 tcg_res, tcg_op3);
8613 tcg_temp_free_i64(tcg_op3);
8614 break;
8615 }
8616 default:
8617 g_assert_not_reached();
8618 }
8619
8620 tcg_gen_ext32u_i64(tcg_res, tcg_res);
8621 write_fp_dreg(s, rd, tcg_res);
8622
8623 tcg_temp_free_i32(tcg_op1);
8624 tcg_temp_free_i32(tcg_op2);
8625 tcg_temp_free_i64(tcg_res);
8626 }
8627 }
8628
8629 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8630 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8631 {
8632 /* Handle 64x64->64 opcodes which are shared between the scalar
8633 * and vector 3-same groups. We cover every opcode where size == 3
8634 * is valid in either the three-reg-same (integer, not pairwise)
8635 * or scalar-three-reg-same groups.
8636 */
8637 TCGCond cond;
8638
8639 switch (opcode) {
8640 case 0x1: /* SQADD */
8641 if (u) {
8642 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8643 } else {
8644 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8645 }
8646 break;
8647 case 0x5: /* SQSUB */
8648 if (u) {
8649 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8650 } else {
8651 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8652 }
8653 break;
8654 case 0x6: /* CMGT, CMHI */
8655 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8656 * We implement this using setcond (test) and then negating.
8657 */
8658 cond = u ? TCG_COND_GTU : TCG_COND_GT;
8659 do_cmop:
8660 tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8661 tcg_gen_neg_i64(tcg_rd, tcg_rd);
8662 break;
8663 case 0x7: /* CMGE, CMHS */
8664 cond = u ? TCG_COND_GEU : TCG_COND_GE;
8665 goto do_cmop;
8666 case 0x11: /* CMTST, CMEQ */
8667 if (u) {
8668 cond = TCG_COND_EQ;
8669 goto do_cmop;
8670 }
8671 gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8672 break;
8673 case 0x8: /* SSHL, USHL */
8674 if (u) {
8675 gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
8676 } else {
8677 gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
8678 }
8679 break;
8680 case 0x9: /* SQSHL, UQSHL */
8681 if (u) {
8682 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8683 } else {
8684 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8685 }
8686 break;
8687 case 0xa: /* SRSHL, URSHL */
8688 if (u) {
8689 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8690 } else {
8691 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8692 }
8693 break;
8694 case 0xb: /* SQRSHL, UQRSHL */
8695 if (u) {
8696 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8697 } else {
8698 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8699 }
8700 break;
8701 case 0x10: /* ADD, SUB */
8702 if (u) {
8703 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8704 } else {
8705 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8706 }
8707 break;
8708 default:
8709 g_assert_not_reached();
8710 }
8711 }
8712
8713 /* Handle the 3-same-operands float operations; shared by the scalar
8714 * and vector encodings. The caller must filter out any encodings
8715 * not allocated for the encoding it is dealing with.
8716 */
8717 static void handle_3same_float(DisasContext *s, int size, int elements,
8718 int fpopcode, int rd, int rn, int rm)
8719 {
8720 int pass;
8721 TCGv_ptr fpst = get_fpstatus_ptr(false);
8722
8723 for (pass = 0; pass < elements; pass++) {
8724 if (size) {
8725 /* Double */
8726 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8727 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8728 TCGv_i64 tcg_res = tcg_temp_new_i64();
8729
8730 read_vec_element(s, tcg_op1, rn, pass, MO_64);
8731 read_vec_element(s, tcg_op2, rm, pass, MO_64);
8732
8733 switch (fpopcode) {
8734 case 0x39: /* FMLS */
8735 /* As usual for ARM, separate negation for fused multiply-add */
8736 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8737 /* fall through */
8738 case 0x19: /* FMLA */
8739 read_vec_element(s, tcg_res, rd, pass, MO_64);
8740 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8741 tcg_res, fpst);
8742 break;
8743 case 0x18: /* FMAXNM */
8744 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8745 break;
8746 case 0x1a: /* FADD */
8747 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8748 break;
8749 case 0x1b: /* FMULX */
8750 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8751 break;
8752 case 0x1c: /* FCMEQ */
8753 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8754 break;
8755 case 0x1e: /* FMAX */
8756 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8757 break;
8758 case 0x1f: /* FRECPS */
8759 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8760 break;
8761 case 0x38: /* FMINNM */
8762 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8763 break;
8764 case 0x3a: /* FSUB */
8765 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8766 break;
8767 case 0x3e: /* FMIN */
8768 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8769 break;
8770 case 0x3f: /* FRSQRTS */
8771 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8772 break;
8773 case 0x5b: /* FMUL */
8774 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8775 break;
8776 case 0x5c: /* FCMGE */
8777 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8778 break;
8779 case 0x5d: /* FACGE */
8780 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8781 break;
8782 case 0x5f: /* FDIV */
8783 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8784 break;
8785 case 0x7a: /* FABD */
8786 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8787 gen_helper_vfp_absd(tcg_res, tcg_res);
8788 break;
8789 case 0x7c: /* FCMGT */
8790 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8791 break;
8792 case 0x7d: /* FACGT */
8793 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8794 break;
8795 default:
8796 g_assert_not_reached();
8797 }
8798
8799 write_vec_element(s, tcg_res, rd, pass, MO_64);
8800
8801 tcg_temp_free_i64(tcg_res);
8802 tcg_temp_free_i64(tcg_op1);
8803 tcg_temp_free_i64(tcg_op2);
8804 } else {
8805 /* Single */
8806 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8807 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8808 TCGv_i32 tcg_res = tcg_temp_new_i32();
8809
8810 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
8811 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
8812
8813 switch (fpopcode) {
8814 case 0x39: /* FMLS */
8815 /* As usual for ARM, separate negation for fused multiply-add */
8816 gen_helper_vfp_negs(tcg_op1, tcg_op1);
8817 /* fall through */
8818 case 0x19: /* FMLA */
8819 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8820 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
8821 tcg_res, fpst);
8822 break;
8823 case 0x1a: /* FADD */
8824 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8825 break;
8826 case 0x1b: /* FMULX */
8827 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
8828 break;
8829 case 0x1c: /* FCMEQ */
8830 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8831 break;
8832 case 0x1e: /* FMAX */
8833 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8834 break;
8835 case 0x1f: /* FRECPS */
8836 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8837 break;
8838 case 0x18: /* FMAXNM */
8839 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8840 break;
8841 case 0x38: /* FMINNM */
8842 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8843 break;
8844 case 0x3a: /* FSUB */
8845 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8846 break;
8847 case 0x3e: /* FMIN */
8848 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8849 break;
8850 case 0x3f: /* FRSQRTS */
8851 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8852 break;
8853 case 0x5b: /* FMUL */
8854 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
8855 break;
8856 case 0x5c: /* FCMGE */
8857 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8858 break;
8859 case 0x5d: /* FACGE */
8860 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8861 break;
8862 case 0x5f: /* FDIV */
8863 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
8864 break;
8865 case 0x7a: /* FABD */
8866 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8867 gen_helper_vfp_abss(tcg_res, tcg_res);
8868 break;
8869 case 0x7c: /* FCMGT */
8870 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8871 break;
8872 case 0x7d: /* FACGT */
8873 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8874 break;
8875 default:
8876 g_assert_not_reached();
8877 }
8878
8879 if (elements == 1) {
8880 /* scalar single so clear high part */
8881 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8882
8883 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
8884 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
8885 tcg_temp_free_i64(tcg_tmp);
8886 } else {
8887 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8888 }
8889
8890 tcg_temp_free_i32(tcg_res);
8891 tcg_temp_free_i32(tcg_op1);
8892 tcg_temp_free_i32(tcg_op2);
8893 }
8894 }
8895
8896 tcg_temp_free_ptr(fpst);
8897
8898 clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
8899 }
8900
8901 /* AdvSIMD scalar three same
8902 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
8903 * +-----+---+-----------+------+---+------+--------+---+------+------+
8904 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
8905 * +-----+---+-----------+------+---+------+--------+---+------+------+
8906 */
8907 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
8908 {
8909 int rd = extract32(insn, 0, 5);
8910 int rn = extract32(insn, 5, 5);
8911 int opcode = extract32(insn, 11, 5);
8912 int rm = extract32(insn, 16, 5);
8913 int size = extract32(insn, 22, 2);
8914 bool u = extract32(insn, 29, 1);
8915 TCGv_i64 tcg_rd;
8916
8917 if (opcode >= 0x18) {
8918 /* Floating point: U, size[1] and opcode indicate operation */
8919 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
8920 switch (fpopcode) {
8921 case 0x1b: /* FMULX */
8922 case 0x1f: /* FRECPS */
8923 case 0x3f: /* FRSQRTS */
8924 case 0x5d: /* FACGE */
8925 case 0x7d: /* FACGT */
8926 case 0x1c: /* FCMEQ */
8927 case 0x5c: /* FCMGE */
8928 case 0x7c: /* FCMGT */
8929 case 0x7a: /* FABD */
8930 break;
8931 default:
8932 unallocated_encoding(s);
8933 return;
8934 }
8935
8936 if (!fp_access_check(s)) {
8937 return;
8938 }
8939
8940 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
8941 return;
8942 }
8943
8944 switch (opcode) {
8945 case 0x1: /* SQADD, UQADD */
8946 case 0x5: /* SQSUB, UQSUB */
8947 case 0x9: /* SQSHL, UQSHL */
8948 case 0xb: /* SQRSHL, UQRSHL */
8949 break;
8950 case 0x8: /* SSHL, USHL */
8951 case 0xa: /* SRSHL, URSHL */
8952 case 0x6: /* CMGT, CMHI */
8953 case 0x7: /* CMGE, CMHS */
8954 case 0x11: /* CMTST, CMEQ */
8955 case 0x10: /* ADD, SUB (vector) */
8956 if (size != 3) {
8957 unallocated_encoding(s);
8958 return;
8959 }
8960 break;
8961 case 0x16: /* SQDMULH, SQRDMULH (vector) */
8962 if (size != 1 && size != 2) {
8963 unallocated_encoding(s);
8964 return;
8965 }
8966 break;
8967 default:
8968 unallocated_encoding(s);
8969 return;
8970 }
8971
8972 if (!fp_access_check(s)) {
8973 return;
8974 }
8975
8976 tcg_rd = tcg_temp_new_i64();
8977
8978 if (size == 3) {
8979 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8980 TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
8981
8982 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
8983 tcg_temp_free_i64(tcg_rn);
8984 tcg_temp_free_i64(tcg_rm);
8985 } else {
8986 /* Do a single operation on the lowest element in the vector.
8987 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
8988 * no side effects for all these operations.
8989 * OPTME: special-purpose helpers would avoid doing some
8990 * unnecessary work in the helper for the 8 and 16 bit cases.
8991 */
8992 NeonGenTwoOpEnvFn *genenvfn;
8993 TCGv_i32 tcg_rn = tcg_temp_new_i32();
8994 TCGv_i32 tcg_rm = tcg_temp_new_i32();
8995 TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
8996
8997 read_vec_element_i32(s, tcg_rn, rn, 0, size);
8998 read_vec_element_i32(s, tcg_rm, rm, 0, size);
8999
9000 switch (opcode) {
9001 case 0x1: /* SQADD, UQADD */
9002 {
9003 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9004 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9005 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9006 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9007 };
9008 genenvfn = fns[size][u];
9009 break;
9010 }
9011 case 0x5: /* SQSUB, UQSUB */
9012 {
9013 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9014 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9015 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9016 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9017 };
9018 genenvfn = fns[size][u];
9019 break;
9020 }
9021 case 0x9: /* SQSHL, UQSHL */
9022 {
9023 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9024 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9025 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9026 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9027 };
9028 genenvfn = fns[size][u];
9029 break;
9030 }
9031 case 0xb: /* SQRSHL, UQRSHL */
9032 {
9033 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9034 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9035 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9036 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9037 };
9038 genenvfn = fns[size][u];
9039 break;
9040 }
9041 case 0x16: /* SQDMULH, SQRDMULH */
9042 {
9043 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9044 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9045 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9046 };
9047 assert(size == 1 || size == 2);
9048 genenvfn = fns[size - 1][u];
9049 break;
9050 }
9051 default:
9052 g_assert_not_reached();
9053 }
9054
9055 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9056 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9057 tcg_temp_free_i32(tcg_rd32);
9058 tcg_temp_free_i32(tcg_rn);
9059 tcg_temp_free_i32(tcg_rm);
9060 }
9061
9062 write_fp_dreg(s, rd, tcg_rd);
9063
9064 tcg_temp_free_i64(tcg_rd);
9065 }
9066
9067 /* AdvSIMD scalar three same FP16
9068 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
9069 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9070 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
9071 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9072 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9073 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9074 */
9075 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9076 uint32_t insn)
9077 {
9078 int rd = extract32(insn, 0, 5);
9079 int rn = extract32(insn, 5, 5);
9080 int opcode = extract32(insn, 11, 3);
9081 int rm = extract32(insn, 16, 5);
9082 bool u = extract32(insn, 29, 1);
9083 bool a = extract32(insn, 23, 1);
9084 int fpopcode = opcode | (a << 3) | (u << 4);
9085 TCGv_ptr fpst;
9086 TCGv_i32 tcg_op1;
9087 TCGv_i32 tcg_op2;
9088 TCGv_i32 tcg_res;
9089
9090 switch (fpopcode) {
9091 case 0x03: /* FMULX */
9092 case 0x04: /* FCMEQ (reg) */
9093 case 0x07: /* FRECPS */
9094 case 0x0f: /* FRSQRTS */
9095 case 0x14: /* FCMGE (reg) */
9096 case 0x15: /* FACGE */
9097 case 0x1a: /* FABD */
9098 case 0x1c: /* FCMGT (reg) */
9099 case 0x1d: /* FACGT */
9100 break;
9101 default:
9102 unallocated_encoding(s);
9103 return;
9104 }
9105
9106 if (!dc_isar_feature(aa64_fp16, s)) {
9107 unallocated_encoding(s);
9108 }
9109
9110 if (!fp_access_check(s)) {
9111 return;
9112 }
9113
9114 fpst = get_fpstatus_ptr(true);
9115
9116 tcg_op1 = read_fp_hreg(s, rn);
9117 tcg_op2 = read_fp_hreg(s, rm);
9118 tcg_res = tcg_temp_new_i32();
9119
9120 switch (fpopcode) {
9121 case 0x03: /* FMULX */
9122 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9123 break;
9124 case 0x04: /* FCMEQ (reg) */
9125 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9126 break;
9127 case 0x07: /* FRECPS */
9128 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9129 break;
9130 case 0x0f: /* FRSQRTS */
9131 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9132 break;
9133 case 0x14: /* FCMGE (reg) */
9134 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9135 break;
9136 case 0x15: /* FACGE */
9137 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9138 break;
9139 case 0x1a: /* FABD */
9140 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9141 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9142 break;
9143 case 0x1c: /* FCMGT (reg) */
9144 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9145 break;
9146 case 0x1d: /* FACGT */
9147 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9148 break;
9149 default:
9150 g_assert_not_reached();
9151 }
9152
9153 write_fp_sreg(s, rd, tcg_res);
9154
9155
9156 tcg_temp_free_i32(tcg_res);
9157 tcg_temp_free_i32(tcg_op1);
9158 tcg_temp_free_i32(tcg_op2);
9159 tcg_temp_free_ptr(fpst);
9160 }
9161
9162 /* AdvSIMD scalar three same extra
9163 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
9164 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9165 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
9166 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9167 */
9168 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9169 uint32_t insn)
9170 {
9171 int rd = extract32(insn, 0, 5);
9172 int rn = extract32(insn, 5, 5);
9173 int opcode = extract32(insn, 11, 4);
9174 int rm = extract32(insn, 16, 5);
9175 int size = extract32(insn, 22, 2);
9176 bool u = extract32(insn, 29, 1);
9177 TCGv_i32 ele1, ele2, ele3;
9178 TCGv_i64 res;
9179 bool feature;
9180
9181 switch (u * 16 + opcode) {
9182 case 0x10: /* SQRDMLAH (vector) */
9183 case 0x11: /* SQRDMLSH (vector) */
9184 if (size != 1 && size != 2) {
9185 unallocated_encoding(s);
9186 return;
9187 }
9188 feature = dc_isar_feature(aa64_rdm, s);
9189 break;
9190 default:
9191 unallocated_encoding(s);
9192 return;
9193 }
9194 if (!feature) {
9195 unallocated_encoding(s);
9196 return;
9197 }
9198 if (!fp_access_check(s)) {
9199 return;
9200 }
9201
9202 /* Do a single operation on the lowest element in the vector.
9203 * We use the standard Neon helpers and rely on 0 OP 0 == 0
9204 * with no side effects for all these operations.
9205 * OPTME: special-purpose helpers would avoid doing some
9206 * unnecessary work in the helper for the 16 bit cases.
9207 */
9208 ele1 = tcg_temp_new_i32();
9209 ele2 = tcg_temp_new_i32();
9210 ele3 = tcg_temp_new_i32();
9211
9212 read_vec_element_i32(s, ele1, rn, 0, size);
9213 read_vec_element_i32(s, ele2, rm, 0, size);
9214 read_vec_element_i32(s, ele3, rd, 0, size);
9215
9216 switch (opcode) {
9217 case 0x0: /* SQRDMLAH */
9218 if (size == 1) {
9219 gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9220 } else {
9221 gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9222 }
9223 break;
9224 case 0x1: /* SQRDMLSH */
9225 if (size == 1) {
9226 gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9227 } else {
9228 gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9229 }
9230 break;
9231 default:
9232 g_assert_not_reached();
9233 }
9234 tcg_temp_free_i32(ele1);
9235 tcg_temp_free_i32(ele2);
9236
9237 res = tcg_temp_new_i64();
9238 tcg_gen_extu_i32_i64(res, ele3);
9239 tcg_temp_free_i32(ele3);
9240
9241 write_fp_dreg(s, rd, res);
9242 tcg_temp_free_i64(res);
9243 }
9244
9245 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9246 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9247 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9248 {
9249 /* Handle 64->64 opcodes which are shared between the scalar and
9250 * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9251 * is valid in either group and also the double-precision fp ops.
9252 * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9253 * requires them.
9254 */
9255 TCGCond cond;
9256
9257 switch (opcode) {
9258 case 0x4: /* CLS, CLZ */
9259 if (u) {
9260 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9261 } else {
9262 tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9263 }
9264 break;
9265 case 0x5: /* NOT */
9266 /* This opcode is shared with CNT and RBIT but we have earlier
9267 * enforced that size == 3 if and only if this is the NOT insn.
9268 */
9269 tcg_gen_not_i64(tcg_rd, tcg_rn);
9270 break;
9271 case 0x7: /* SQABS, SQNEG */
9272 if (u) {
9273 gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9274 } else {
9275 gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9276 }
9277 break;
9278 case 0xa: /* CMLT */
9279 /* 64 bit integer comparison against zero, result is
9280 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9281 * subtracting 1.
9282 */
9283 cond = TCG_COND_LT;
9284 do_cmop:
9285 tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9286 tcg_gen_neg_i64(tcg_rd, tcg_rd);
9287 break;
9288 case 0x8: /* CMGT, CMGE */
9289 cond = u ? TCG_COND_GE : TCG_COND_GT;
9290 goto do_cmop;
9291 case 0x9: /* CMEQ, CMLE */
9292 cond = u ? TCG_COND_LE : TCG_COND_EQ;
9293 goto do_cmop;
9294 case 0xb: /* ABS, NEG */
9295 if (u) {
9296 tcg_gen_neg_i64(tcg_rd, tcg_rn);
9297 } else {
9298 tcg_gen_abs_i64(tcg_rd, tcg_rn);
9299 }
9300 break;
9301 case 0x2f: /* FABS */
9302 gen_helper_vfp_absd(tcg_rd, tcg_rn);
9303 break;
9304 case 0x6f: /* FNEG */
9305 gen_helper_vfp_negd(tcg_rd, tcg_rn);
9306 break;
9307 case 0x7f: /* FSQRT */
9308 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9309 break;
9310 case 0x1a: /* FCVTNS */
9311 case 0x1b: /* FCVTMS */
9312 case 0x1c: /* FCVTAS */
9313 case 0x3a: /* FCVTPS */
9314 case 0x3b: /* FCVTZS */
9315 {
9316 TCGv_i32 tcg_shift = tcg_const_i32(0);
9317 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9318 tcg_temp_free_i32(tcg_shift);
9319 break;
9320 }
9321 case 0x5a: /* FCVTNU */
9322 case 0x5b: /* FCVTMU */
9323 case 0x5c: /* FCVTAU */
9324 case 0x7a: /* FCVTPU */
9325 case 0x7b: /* FCVTZU */
9326 {
9327 TCGv_i32 tcg_shift = tcg_const_i32(0);
9328 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9329 tcg_temp_free_i32(tcg_shift);
9330 break;
9331 }
9332 case 0x18: /* FRINTN */
9333 case 0x19: /* FRINTM */
9334 case 0x38: /* FRINTP */
9335 case 0x39: /* FRINTZ */
9336 case 0x58: /* FRINTA */
9337 case 0x79: /* FRINTI */
9338 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9339 break;
9340 case 0x59: /* FRINTX */
9341 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9342 break;
9343 case 0x1e: /* FRINT32Z */
9344 case 0x5e: /* FRINT32X */
9345 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9346 break;
9347 case 0x1f: /* FRINT64Z */
9348 case 0x5f: /* FRINT64X */
9349 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9350 break;
9351 default:
9352 g_assert_not_reached();
9353 }
9354 }
9355
9356 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9357 bool is_scalar, bool is_u, bool is_q,
9358 int size, int rn, int rd)
9359 {
9360 bool is_double = (size == MO_64);
9361 TCGv_ptr fpst;
9362
9363 if (!fp_access_check(s)) {
9364 return;
9365 }
9366
9367 fpst = get_fpstatus_ptr(size == MO_16);
9368
9369 if (is_double) {
9370 TCGv_i64 tcg_op = tcg_temp_new_i64();
9371 TCGv_i64 tcg_zero = tcg_const_i64(0);
9372 TCGv_i64 tcg_res = tcg_temp_new_i64();
9373 NeonGenTwoDoubleOPFn *genfn;
9374 bool swap = false;
9375 int pass;
9376
9377 switch (opcode) {
9378 case 0x2e: /* FCMLT (zero) */
9379 swap = true;
9380 /* fallthrough */
9381 case 0x2c: /* FCMGT (zero) */
9382 genfn = gen_helper_neon_cgt_f64;
9383 break;
9384 case 0x2d: /* FCMEQ (zero) */
9385 genfn = gen_helper_neon_ceq_f64;
9386 break;
9387 case 0x6d: /* FCMLE (zero) */
9388 swap = true;
9389 /* fall through */
9390 case 0x6c: /* FCMGE (zero) */
9391 genfn = gen_helper_neon_cge_f64;
9392 break;
9393 default:
9394 g_assert_not_reached();
9395 }
9396
9397 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9398 read_vec_element(s, tcg_op, rn, pass, MO_64);
9399 if (swap) {
9400 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9401 } else {
9402 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9403 }
9404 write_vec_element(s, tcg_res, rd, pass, MO_64);
9405 }
9406 tcg_temp_free_i64(tcg_res);
9407 tcg_temp_free_i64(tcg_zero);
9408 tcg_temp_free_i64(tcg_op);
9409
9410 clear_vec_high(s, !is_scalar, rd);
9411 } else {
9412 TCGv_i32 tcg_op = tcg_temp_new_i32();
9413 TCGv_i32 tcg_zero = tcg_const_i32(0);
9414 TCGv_i32 tcg_res = tcg_temp_new_i32();
9415 NeonGenTwoSingleOPFn *genfn;
9416 bool swap = false;
9417 int pass, maxpasses;
9418
9419 if (size == MO_16) {
9420 switch (opcode) {
9421 case 0x2e: /* FCMLT (zero) */
9422 swap = true;
9423 /* fall through */
9424 case 0x2c: /* FCMGT (zero) */
9425 genfn = gen_helper_advsimd_cgt_f16;
9426 break;
9427 case 0x2d: /* FCMEQ (zero) */
9428 genfn = gen_helper_advsimd_ceq_f16;
9429 break;
9430 case 0x6d: /* FCMLE (zero) */
9431 swap = true;
9432 /* fall through */
9433 case 0x6c: /* FCMGE (zero) */
9434 genfn = gen_helper_advsimd_cge_f16;
9435 break;
9436 default:
9437 g_assert_not_reached();
9438 }
9439 } else {
9440 switch (opcode) {
9441 case 0x2e: /* FCMLT (zero) */
9442 swap = true;
9443 /* fall through */
9444 case 0x2c: /* FCMGT (zero) */
9445 genfn = gen_helper_neon_cgt_f32;
9446 break;
9447 case 0x2d: /* FCMEQ (zero) */
9448 genfn = gen_helper_neon_ceq_f32;
9449 break;
9450 case 0x6d: /* FCMLE (zero) */
9451 swap = true;
9452 /* fall through */
9453 case 0x6c: /* FCMGE (zero) */
9454 genfn = gen_helper_neon_cge_f32;
9455 break;
9456 default:
9457 g_assert_not_reached();
9458 }
9459 }
9460
9461 if (is_scalar) {
9462 maxpasses = 1;
9463 } else {
9464 int vector_size = 8 << is_q;
9465 maxpasses = vector_size >> size;
9466 }
9467
9468 for (pass = 0; pass < maxpasses; pass++) {
9469 read_vec_element_i32(s, tcg_op, rn, pass, size);
9470 if (swap) {
9471 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9472 } else {
9473 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9474 }
9475 if (is_scalar) {
9476 write_fp_sreg(s, rd, tcg_res);
9477 } else {
9478 write_vec_element_i32(s, tcg_res, rd, pass, size);
9479 }
9480 }
9481 tcg_temp_free_i32(tcg_res);
9482 tcg_temp_free_i32(tcg_zero);
9483 tcg_temp_free_i32(tcg_op);
9484 if (!is_scalar) {
9485 clear_vec_high(s, is_q, rd);
9486 }
9487 }
9488
9489 tcg_temp_free_ptr(fpst);
9490 }
9491
9492 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9493 bool is_scalar, bool is_u, bool is_q,
9494 int size, int rn, int rd)
9495 {
9496 bool is_double = (size == 3);
9497 TCGv_ptr fpst = get_fpstatus_ptr(false);
9498
9499 if (is_double) {
9500 TCGv_i64 tcg_op = tcg_temp_new_i64();
9501 TCGv_i64 tcg_res = tcg_temp_new_i64();
9502 int pass;
9503
9504 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9505 read_vec_element(s, tcg_op, rn, pass, MO_64);
9506 switch (opcode) {
9507 case 0x3d: /* FRECPE */
9508 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9509 break;
9510 case 0x3f: /* FRECPX */
9511 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9512 break;
9513 case 0x7d: /* FRSQRTE */
9514 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9515 break;
9516 default:
9517 g_assert_not_reached();
9518 }
9519 write_vec_element(s, tcg_res, rd, pass, MO_64);
9520 }
9521 tcg_temp_free_i64(tcg_res);
9522 tcg_temp_free_i64(tcg_op);
9523 clear_vec_high(s, !is_scalar, rd);
9524 } else {
9525 TCGv_i32 tcg_op = tcg_temp_new_i32();
9526 TCGv_i32 tcg_res = tcg_temp_new_i32();
9527 int pass, maxpasses;
9528
9529 if (is_scalar) {
9530 maxpasses = 1;
9531 } else {
9532 maxpasses = is_q ? 4 : 2;
9533 }
9534
9535 for (pass = 0; pass < maxpasses; pass++) {
9536 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9537
9538 switch (opcode) {
9539 case 0x3c: /* URECPE */
9540 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
9541 break;
9542 case 0x3d: /* FRECPE */
9543 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9544 break;
9545 case 0x3f: /* FRECPX */
9546 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9547 break;
9548 case 0x7d: /* FRSQRTE */
9549 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9550 break;
9551 default:
9552 g_assert_not_reached();
9553 }
9554
9555 if (is_scalar) {
9556 write_fp_sreg(s, rd, tcg_res);
9557 } else {
9558 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9559 }
9560 }
9561 tcg_temp_free_i32(tcg_res);
9562 tcg_temp_free_i32(tcg_op);
9563 if (!is_scalar) {
9564 clear_vec_high(s, is_q, rd);
9565 }
9566 }
9567 tcg_temp_free_ptr(fpst);
9568 }
9569
9570 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9571 int opcode, bool u, bool is_q,
9572 int size, int rn, int rd)
9573 {
9574 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9575 * in the source becomes a size element in the destination).
9576 */
9577 int pass;
9578 TCGv_i32 tcg_res[2];
9579 int destelt = is_q ? 2 : 0;
9580 int passes = scalar ? 1 : 2;
9581
9582 if (scalar) {
9583 tcg_res[1] = tcg_const_i32(0);
9584 }
9585
9586 for (pass = 0; pass < passes; pass++) {
9587 TCGv_i64 tcg_op = tcg_temp_new_i64();
9588 NeonGenNarrowFn *genfn = NULL;
9589 NeonGenNarrowEnvFn *genenvfn = NULL;
9590
9591 if (scalar) {
9592 read_vec_element(s, tcg_op, rn, pass, size + 1);
9593 } else {
9594 read_vec_element(s, tcg_op, rn, pass, MO_64);
9595 }
9596 tcg_res[pass] = tcg_temp_new_i32();
9597
9598 switch (opcode) {
9599 case 0x12: /* XTN, SQXTUN */
9600 {
9601 static NeonGenNarrowFn * const xtnfns[3] = {
9602 gen_helper_neon_narrow_u8,
9603 gen_helper_neon_narrow_u16,
9604 tcg_gen_extrl_i64_i32,
9605 };
9606 static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9607 gen_helper_neon_unarrow_sat8,
9608 gen_helper_neon_unarrow_sat16,
9609 gen_helper_neon_unarrow_sat32,
9610 };
9611 if (u) {
9612 genenvfn = sqxtunfns[size];
9613 } else {
9614 genfn = xtnfns[size];
9615 }
9616 break;
9617 }
9618 case 0x14: /* SQXTN, UQXTN */
9619 {
9620 static NeonGenNarrowEnvFn * const fns[3][2] = {
9621 { gen_helper_neon_narrow_sat_s8,
9622 gen_helper_neon_narrow_sat_u8 },
9623 { gen_helper_neon_narrow_sat_s16,
9624 gen_helper_neon_narrow_sat_u16 },
9625 { gen_helper_neon_narrow_sat_s32,
9626 gen_helper_neon_narrow_sat_u32 },
9627 };
9628 genenvfn = fns[size][u];
9629 break;
9630 }
9631 case 0x16: /* FCVTN, FCVTN2 */
9632 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9633 if (size == 2) {
9634 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9635 } else {
9636 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9637 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9638 TCGv_ptr fpst = get_fpstatus_ptr(false);
9639 TCGv_i32 ahp = get_ahp_flag();
9640
9641 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9642 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9643 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9644 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9645 tcg_temp_free_i32(tcg_lo);
9646 tcg_temp_free_i32(tcg_hi);
9647 tcg_temp_free_ptr(fpst);
9648 tcg_temp_free_i32(ahp);
9649 }
9650 break;
9651 case 0x56: /* FCVTXN, FCVTXN2 */
9652 /* 64 bit to 32 bit float conversion
9653 * with von Neumann rounding (round to odd)
9654 */
9655 assert(size == 2);
9656 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9657 break;
9658 default:
9659 g_assert_not_reached();
9660 }
9661
9662 if (genfn) {
9663 genfn(tcg_res[pass], tcg_op);
9664 } else if (genenvfn) {
9665 genenvfn(tcg_res[pass], cpu_env, tcg_op);
9666 }
9667
9668 tcg_temp_free_i64(tcg_op);
9669 }
9670
9671 for (pass = 0; pass < 2; pass++) {
9672 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9673 tcg_temp_free_i32(tcg_res[pass]);
9674 }
9675 clear_vec_high(s, is_q, rd);
9676 }
9677
9678 /* Remaining saturating accumulating ops */
9679 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9680 bool is_q, int size, int rn, int rd)
9681 {
9682 bool is_double = (size == 3);
9683
9684 if (is_double) {
9685 TCGv_i64 tcg_rn = tcg_temp_new_i64();
9686 TCGv_i64 tcg_rd = tcg_temp_new_i64();
9687 int pass;
9688
9689 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9690 read_vec_element(s, tcg_rn, rn, pass, MO_64);
9691 read_vec_element(s, tcg_rd, rd, pass, MO_64);
9692
9693 if (is_u) { /* USQADD */
9694 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9695 } else { /* SUQADD */
9696 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9697 }
9698 write_vec_element(s, tcg_rd, rd, pass, MO_64);
9699 }
9700 tcg_temp_free_i64(tcg_rd);
9701 tcg_temp_free_i64(tcg_rn);
9702 clear_vec_high(s, !is_scalar, rd);
9703 } else {
9704 TCGv_i32 tcg_rn = tcg_temp_new_i32();
9705 TCGv_i32 tcg_rd = tcg_temp_new_i32();
9706 int pass, maxpasses;
9707
9708 if (is_scalar) {
9709 maxpasses = 1;
9710 } else {
9711 maxpasses = is_q ? 4 : 2;
9712 }
9713
9714 for (pass = 0; pass < maxpasses; pass++) {
9715 if (is_scalar) {
9716 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9717 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9718 } else {
9719 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9720 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9721 }
9722
9723 if (is_u) { /* USQADD */
9724 switch (size) {
9725 case 0:
9726 gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9727 break;
9728 case 1:
9729 gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9730 break;
9731 case 2:
9732 gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9733 break;
9734 default:
9735 g_assert_not_reached();
9736 }
9737 } else { /* SUQADD */
9738 switch (size) {
9739 case 0:
9740 gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9741 break;
9742 case 1:
9743 gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9744 break;
9745 case 2:
9746 gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9747 break;
9748 default:
9749 g_assert_not_reached();
9750 }
9751 }
9752
9753 if (is_scalar) {
9754 TCGv_i64 tcg_zero = tcg_const_i64(0);
9755 write_vec_element(s, tcg_zero, rd, 0, MO_64);
9756 tcg_temp_free_i64(tcg_zero);
9757 }
9758 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9759 }
9760 tcg_temp_free_i32(tcg_rd);
9761 tcg_temp_free_i32(tcg_rn);
9762 clear_vec_high(s, is_q, rd);
9763 }
9764 }
9765
9766 /* AdvSIMD scalar two reg misc
9767 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
9768 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9769 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
9770 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9771 */
9772 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9773 {
9774 int rd = extract32(insn, 0, 5);
9775 int rn = extract32(insn, 5, 5);
9776 int opcode = extract32(insn, 12, 5);
9777 int size = extract32(insn, 22, 2);
9778 bool u = extract32(insn, 29, 1);
9779 bool is_fcvt = false;
9780 int rmode;
9781 TCGv_i32 tcg_rmode;
9782 TCGv_ptr tcg_fpstatus;
9783
9784 switch (opcode) {
9785 case 0x3: /* USQADD / SUQADD*/
9786 if (!fp_access_check(s)) {
9787 return;
9788 }
9789 handle_2misc_satacc(s, true, u, false, size, rn, rd);
9790 return;
9791 case 0x7: /* SQABS / SQNEG */
9792 break;
9793 case 0xa: /* CMLT */
9794 if (u) {
9795 unallocated_encoding(s);
9796 return;
9797 }
9798 /* fall through */
9799 case 0x8: /* CMGT, CMGE */
9800 case 0x9: /* CMEQ, CMLE */
9801 case 0xb: /* ABS, NEG */
9802 if (size != 3) {
9803 unallocated_encoding(s);
9804 return;
9805 }
9806 break;
9807 case 0x12: /* SQXTUN */
9808 if (!u) {
9809 unallocated_encoding(s);
9810 return;
9811 }
9812 /* fall through */
9813 case 0x14: /* SQXTN, UQXTN */
9814 if (size == 3) {
9815 unallocated_encoding(s);
9816 return;
9817 }
9818 if (!fp_access_check(s)) {
9819 return;
9820 }
9821 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9822 return;
9823 case 0xc ... 0xf:
9824 case 0x16 ... 0x1d:
9825 case 0x1f:
9826 /* Floating point: U, size[1] and opcode indicate operation;
9827 * size[0] indicates single or double precision.
9828 */
9829 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9830 size = extract32(size, 0, 1) ? 3 : 2;
9831 switch (opcode) {
9832 case 0x2c: /* FCMGT (zero) */
9833 case 0x2d: /* FCMEQ (zero) */
9834 case 0x2e: /* FCMLT (zero) */
9835 case 0x6c: /* FCMGE (zero) */
9836 case 0x6d: /* FCMLE (zero) */
9837 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9838 return;
9839 case 0x1d: /* SCVTF */
9840 case 0x5d: /* UCVTF */
9841 {
9842 bool is_signed = (opcode == 0x1d);
9843 if (!fp_access_check(s)) {
9844 return;
9845 }
9846 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9847 return;
9848 }
9849 case 0x3d: /* FRECPE */
9850 case 0x3f: /* FRECPX */
9851 case 0x7d: /* FRSQRTE */
9852 if (!fp_access_check(s)) {
9853 return;
9854 }
9855 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
9856 return;
9857 case 0x1a: /* FCVTNS */
9858 case 0x1b: /* FCVTMS */
9859 case 0x3a: /* FCVTPS */
9860 case 0x3b: /* FCVTZS */
9861 case 0x5a: /* FCVTNU */
9862 case 0x5b: /* FCVTMU */
9863 case 0x7a: /* FCVTPU */
9864 case 0x7b: /* FCVTZU */
9865 is_fcvt = true;
9866 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9867 break;
9868 case 0x1c: /* FCVTAS */
9869 case 0x5c: /* FCVTAU */
9870 /* TIEAWAY doesn't fit in the usual rounding mode encoding */
9871 is_fcvt = true;
9872 rmode = FPROUNDING_TIEAWAY;
9873 break;
9874 case 0x56: /* FCVTXN, FCVTXN2 */
9875 if (size == 2) {
9876 unallocated_encoding(s);
9877 return;
9878 }
9879 if (!fp_access_check(s)) {
9880 return;
9881 }
9882 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
9883 return;
9884 default:
9885 unallocated_encoding(s);
9886 return;
9887 }
9888 break;
9889 default:
9890 unallocated_encoding(s);
9891 return;
9892 }
9893
9894 if (!fp_access_check(s)) {
9895 return;
9896 }
9897
9898 if (is_fcvt) {
9899 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
9900 tcg_fpstatus = get_fpstatus_ptr(false);
9901 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9902 } else {
9903 tcg_rmode = NULL;
9904 tcg_fpstatus = NULL;
9905 }
9906
9907 if (size == 3) {
9908 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9909 TCGv_i64 tcg_rd = tcg_temp_new_i64();
9910
9911 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
9912 write_fp_dreg(s, rd, tcg_rd);
9913 tcg_temp_free_i64(tcg_rd);
9914 tcg_temp_free_i64(tcg_rn);
9915 } else {
9916 TCGv_i32 tcg_rn = tcg_temp_new_i32();
9917 TCGv_i32 tcg_rd = tcg_temp_new_i32();
9918
9919 read_vec_element_i32(s, tcg_rn, rn, 0, size);
9920
9921 switch (opcode) {
9922 case 0x7: /* SQABS, SQNEG */
9923 {
9924 NeonGenOneOpEnvFn *genfn;
9925 static NeonGenOneOpEnvFn * const fns[3][2] = {
9926 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
9927 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
9928 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
9929 };
9930 genfn = fns[size][u];
9931 genfn(tcg_rd, cpu_env, tcg_rn);
9932 break;
9933 }
9934 case 0x1a: /* FCVTNS */
9935 case 0x1b: /* FCVTMS */
9936 case 0x1c: /* FCVTAS */
9937 case 0x3a: /* FCVTPS */
9938 case 0x3b: /* FCVTZS */
9939 {
9940 TCGv_i32 tcg_shift = tcg_const_i32(0);
9941 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9942 tcg_temp_free_i32(tcg_shift);
9943 break;
9944 }
9945 case 0x5a: /* FCVTNU */
9946 case 0x5b: /* FCVTMU */
9947 case 0x5c: /* FCVTAU */
9948 case 0x7a: /* FCVTPU */
9949 case 0x7b: /* FCVTZU */
9950 {
9951 TCGv_i32 tcg_shift = tcg_const_i32(0);
9952 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9953 tcg_temp_free_i32(tcg_shift);
9954 break;
9955 }
9956 default:
9957 g_assert_not_reached();
9958 }
9959
9960 write_fp_sreg(s, rd, tcg_rd);
9961 tcg_temp_free_i32(tcg_rd);
9962 tcg_temp_free_i32(tcg_rn);
9963 }
9964
9965 if (is_fcvt) {
9966 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9967 tcg_temp_free_i32(tcg_rmode);
9968 tcg_temp_free_ptr(tcg_fpstatus);
9969 }
9970 }
9971
9972 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
9973 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
9974 int immh, int immb, int opcode, int rn, int rd)
9975 {
9976 int size = 32 - clz32(immh) - 1;
9977 int immhb = immh << 3 | immb;
9978 int shift = 2 * (8 << size) - immhb;
9979 bool accumulate = false;
9980 int dsize = is_q ? 128 : 64;
9981 int esize = 8 << size;
9982 int elements = dsize/esize;
9983 MemOp memop = size | (is_u ? 0 : MO_SIGN);
9984 TCGv_i64 tcg_rn = new_tmp_a64(s);
9985 TCGv_i64 tcg_rd = new_tmp_a64(s);
9986 TCGv_i64 tcg_round;
9987 uint64_t round_const;
9988 int i;
9989
9990 if (extract32(immh, 3, 1) && !is_q) {
9991 unallocated_encoding(s);
9992 return;
9993 }
9994 tcg_debug_assert(size <= 3);
9995
9996 if (!fp_access_check(s)) {
9997 return;
9998 }
9999
10000 switch (opcode) {
10001 case 0x02: /* SSRA / USRA (accumulate) */
10002 if (is_u) {
10003 /* Shift count same as element size produces zero to add. */
10004 if (shift == 8 << size) {
10005 goto done;
10006 }
10007 gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
10008 } else {
10009 /* Shift count same as element size produces all sign to add. */
10010 if (shift == 8 << size) {
10011 shift -= 1;
10012 }
10013 gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
10014 }
10015 return;
10016 case 0x08: /* SRI */
10017 /* Shift count same as element size is valid but does nothing. */
10018 if (shift == 8 << size) {
10019 goto done;
10020 }
10021 gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
10022 return;
10023
10024 case 0x00: /* SSHR / USHR */
10025 if (is_u) {
10026 if (shift == 8 << size) {
10027 /* Shift count the same size as element size produces zero. */
10028 tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
10029 is_q ? 16 : 8, vec_full_reg_size(s), 0);
10030 } else {
10031 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
10032 }
10033 } else {
10034 /* Shift count the same size as element size produces all sign. */
10035 if (shift == 8 << size) {
10036 shift -= 1;
10037 }
10038 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
10039 }
10040 return;
10041
10042 case 0x04: /* SRSHR / URSHR (rounding) */
10043 break;
10044 case 0x06: /* SRSRA / URSRA (accum + rounding) */
10045 accumulate = true;
10046 break;
10047 default:
10048 g_assert_not_reached();
10049 }
10050
10051 round_const = 1ULL << (shift - 1);
10052 tcg_round = tcg_const_i64(round_const);
10053
10054 for (i = 0; i < elements; i++) {
10055 read_vec_element(s, tcg_rn, rn, i, memop);
10056 if (accumulate) {
10057 read_vec_element(s, tcg_rd, rd, i, memop);
10058 }
10059
10060 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10061 accumulate, is_u, size, shift);
10062
10063 write_vec_element(s, tcg_rd, rd, i, size);
10064 }
10065 tcg_temp_free_i64(tcg_round);
10066
10067 done:
10068 clear_vec_high(s, is_q, rd);
10069 }
10070
10071 /* SHL/SLI - Vector shift left */
10072 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10073 int immh, int immb, int opcode, int rn, int rd)
10074 {
10075 int size = 32 - clz32(immh) - 1;
10076 int immhb = immh << 3 | immb;
10077 int shift = immhb - (8 << size);
10078
10079 /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10080 assert(size >= 0 && size <= 3);
10081
10082 if (extract32(immh, 3, 1) && !is_q) {
10083 unallocated_encoding(s);
10084 return;
10085 }
10086
10087 if (!fp_access_check(s)) {
10088 return;
10089 }
10090
10091 if (insert) {
10092 gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
10093 } else {
10094 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10095 }
10096 }
10097
10098 /* USHLL/SHLL - Vector shift left with widening */
10099 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10100 int immh, int immb, int opcode, int rn, int rd)
10101 {
10102 int size = 32 - clz32(immh) - 1;
10103 int immhb = immh << 3 | immb;
10104 int shift = immhb - (8 << size);
10105 int dsize = 64;
10106 int esize = 8 << size;
10107 int elements = dsize/esize;
10108 TCGv_i64 tcg_rn = new_tmp_a64(s);
10109 TCGv_i64 tcg_rd = new_tmp_a64(s);
10110 int i;
10111
10112 if (size >= 3) {
10113 unallocated_encoding(s);
10114 return;
10115 }
10116
10117 if (!fp_access_check(s)) {
10118 return;
10119 }
10120
10121 /* For the LL variants the store is larger than the load,
10122 * so if rd == rn we would overwrite parts of our input.
10123 * So load everything right now and use shifts in the main loop.
10124 */
10125 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10126
10127 for (i = 0; i < elements; i++) {
10128 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10129 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10130 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10131 write_vec_element(s, tcg_rd, rd, i, size + 1);
10132 }
10133 }
10134
10135 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10136 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10137 int immh, int immb, int opcode, int rn, int rd)
10138 {
10139 int immhb = immh << 3 | immb;
10140 int size = 32 - clz32(immh) - 1;
10141 int dsize = 64;
10142 int esize = 8 << size;
10143 int elements = dsize/esize;
10144 int shift = (2 * esize) - immhb;
10145 bool round = extract32(opcode, 0, 1);
10146 TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10147 TCGv_i64 tcg_round;
10148 int i;
10149
10150 if (extract32(immh, 3, 1)) {
10151 unallocated_encoding(s);
10152 return;
10153 }
10154
10155 if (!fp_access_check(s)) {
10156 return;
10157 }
10158
10159 tcg_rn = tcg_temp_new_i64();
10160 tcg_rd = tcg_temp_new_i64();
10161 tcg_final = tcg_temp_new_i64();
10162 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10163
10164 if (round) {
10165 uint64_t round_const = 1ULL << (shift - 1);
10166 tcg_round = tcg_const_i64(round_const);
10167 } else {
10168 tcg_round = NULL;
10169 }
10170
10171 for (i = 0; i < elements; i++) {
10172 read_vec_element(s, tcg_rn, rn, i, size+1);
10173 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10174 false, true, size+1, shift);
10175
10176 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10177 }
10178
10179 if (!is_q) {
10180 write_vec_element(s, tcg_final, rd, 0, MO_64);
10181 } else {
10182 write_vec_element(s, tcg_final, rd, 1, MO_64);
10183 }
10184 if (round) {
10185 tcg_temp_free_i64(tcg_round);
10186 }
10187 tcg_temp_free_i64(tcg_rn);
10188 tcg_temp_free_i64(tcg_rd);
10189 tcg_temp_free_i64(tcg_final);
10190
10191 clear_vec_high(s, is_q, rd);
10192 }
10193
10194
10195 /* AdvSIMD shift by immediate
10196 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
10197 * +---+---+---+-------------+------+------+--------+---+------+------+
10198 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
10199 * +---+---+---+-------------+------+------+--------+---+------+------+
10200 */
10201 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10202 {
10203 int rd = extract32(insn, 0, 5);
10204 int rn = extract32(insn, 5, 5);
10205 int opcode = extract32(insn, 11, 5);
10206 int immb = extract32(insn, 16, 3);
10207 int immh = extract32(insn, 19, 4);
10208 bool is_u = extract32(insn, 29, 1);
10209 bool is_q = extract32(insn, 30, 1);
10210
10211 switch (opcode) {
10212 case 0x08: /* SRI */
10213 if (!is_u) {
10214 unallocated_encoding(s);
10215 return;
10216 }
10217 /* fall through */
10218 case 0x00: /* SSHR / USHR */
10219 case 0x02: /* SSRA / USRA (accumulate) */
10220 case 0x04: /* SRSHR / URSHR (rounding) */
10221 case 0x06: /* SRSRA / URSRA (accum + rounding) */
10222 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10223 break;
10224 case 0x0a: /* SHL / SLI */
10225 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10226 break;
10227 case 0x10: /* SHRN */
10228 case 0x11: /* RSHRN / SQRSHRUN */
10229 if (is_u) {
10230 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10231 opcode, rn, rd);
10232 } else {
10233 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10234 }
10235 break;
10236 case 0x12: /* SQSHRN / UQSHRN */
10237 case 0x13: /* SQRSHRN / UQRSHRN */
10238 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10239 opcode, rn, rd);
10240 break;
10241 case 0x14: /* SSHLL / USHLL */
10242 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10243 break;
10244 case 0x1c: /* SCVTF / UCVTF */
10245 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10246 opcode, rn, rd);
10247 break;
10248 case 0xc: /* SQSHLU */
10249 if (!is_u) {
10250 unallocated_encoding(s);
10251 return;
10252 }
10253 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10254 break;
10255 case 0xe: /* SQSHL, UQSHL */
10256 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10257 break;
10258 case 0x1f: /* FCVTZS/ FCVTZU */
10259 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10260 return;
10261 default:
10262 unallocated_encoding(s);
10263 return;
10264 }
10265 }
10266
10267 /* Generate code to do a "long" addition or subtraction, ie one done in
10268 * TCGv_i64 on vector lanes twice the width specified by size.
10269 */
10270 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10271 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10272 {
10273 static NeonGenTwo64OpFn * const fns[3][2] = {
10274 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10275 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10276 { tcg_gen_add_i64, tcg_gen_sub_i64 },
10277 };
10278 NeonGenTwo64OpFn *genfn;
10279 assert(size < 3);
10280
10281 genfn = fns[size][is_sub];
10282 genfn(tcg_res, tcg_op1, tcg_op2);
10283 }
10284
10285 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10286 int opcode, int rd, int rn, int rm)
10287 {
10288 /* 3-reg-different widening insns: 64 x 64 -> 128 */
10289 TCGv_i64 tcg_res[2];
10290 int pass, accop;
10291
10292 tcg_res[0] = tcg_temp_new_i64();
10293 tcg_res[1] = tcg_temp_new_i64();
10294
10295 /* Does this op do an adding accumulate, a subtracting accumulate,
10296 * or no accumulate at all?
10297 */
10298 switch (opcode) {
10299 case 5:
10300 case 8:
10301 case 9:
10302 accop = 1;
10303 break;
10304 case 10:
10305 case 11:
10306 accop = -1;
10307 break;
10308 default:
10309 accop = 0;
10310 break;
10311 }
10312
10313 if (accop != 0) {
10314 read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10315 read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10316 }
10317
10318 /* size == 2 means two 32x32->64 operations; this is worth special
10319 * casing because we can generally handle it inline.
10320 */
10321 if (size == 2) {
10322 for (pass = 0; pass < 2; pass++) {
10323 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10324 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10325 TCGv_i64 tcg_passres;
10326 MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10327
10328 int elt = pass + is_q * 2;
10329
10330 read_vec_element(s, tcg_op1, rn, elt, memop);
10331 read_vec_element(s, tcg_op2, rm, elt, memop);
10332
10333 if (accop == 0) {
10334 tcg_passres = tcg_res[pass];
10335 } else {
10336 tcg_passres = tcg_temp_new_i64();
10337 }
10338
10339 switch (opcode) {
10340 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10341 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10342 break;
10343 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10344 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10345 break;
10346 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10347 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10348 {
10349 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10350 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10351
10352 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10353 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10354 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10355 tcg_passres,
10356 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10357 tcg_temp_free_i64(tcg_tmp1);
10358 tcg_temp_free_i64(tcg_tmp2);
10359 break;
10360 }
10361 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10362 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10363 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10364 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10365 break;
10366 case 9: /* SQDMLAL, SQDMLAL2 */
10367 case 11: /* SQDMLSL, SQDMLSL2 */
10368 case 13: /* SQDMULL, SQDMULL2 */
10369 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10370 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10371 tcg_passres, tcg_passres);
10372 break;
10373 default:
10374 g_assert_not_reached();
10375 }
10376
10377 if (opcode == 9 || opcode == 11) {
10378 /* saturating accumulate ops */
10379 if (accop < 0) {
10380 tcg_gen_neg_i64(tcg_passres, tcg_passres);
10381 }
10382 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10383 tcg_res[pass], tcg_passres);
10384 } else if (accop > 0) {
10385 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10386 } else if (accop < 0) {
10387 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10388 }
10389
10390 if (accop != 0) {
10391 tcg_temp_free_i64(tcg_passres);
10392 }
10393
10394 tcg_temp_free_i64(tcg_op1);
10395 tcg_temp_free_i64(tcg_op2);
10396 }
10397 } else {
10398 /* size 0 or 1, generally helper functions */
10399 for (pass = 0; pass < 2; pass++) {
10400 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10401 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10402 TCGv_i64 tcg_passres;
10403 int elt = pass + is_q * 2;
10404
10405 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10406 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10407
10408 if (accop == 0) {
10409 tcg_passres = tcg_res[pass];
10410 } else {
10411 tcg_passres = tcg_temp_new_i64();
10412 }
10413
10414 switch (opcode) {
10415 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10416 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10417 {
10418 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10419 static NeonGenWidenFn * const widenfns[2][2] = {
10420 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10421 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10422 };
10423 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10424
10425 widenfn(tcg_op2_64, tcg_op2);
10426 widenfn(tcg_passres, tcg_op1);
10427 gen_neon_addl(size, (opcode == 2), tcg_passres,
10428 tcg_passres, tcg_op2_64);
10429 tcg_temp_free_i64(tcg_op2_64);
10430 break;
10431 }
10432 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10433 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10434 if (size == 0) {
10435 if (is_u) {
10436 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10437 } else {
10438 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10439 }
10440 } else {
10441 if (is_u) {
10442 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10443 } else {
10444 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10445 }
10446 }
10447 break;
10448 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10449 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10450 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10451 if (size == 0) {
10452 if (is_u) {
10453 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10454 } else {
10455 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10456 }
10457 } else {
10458 if (is_u) {
10459 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10460 } else {
10461 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10462 }
10463 }
10464 break;
10465 case 9: /* SQDMLAL, SQDMLAL2 */
10466 case 11: /* SQDMLSL, SQDMLSL2 */
10467 case 13: /* SQDMULL, SQDMULL2 */
10468 assert(size == 1);
10469 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10470 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10471 tcg_passres, tcg_passres);
10472 break;
10473 case 14: /* PMULL */
10474 assert(size == 0);
10475 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
10476 break;
10477 default:
10478 g_assert_not_reached();
10479 }
10480 tcg_temp_free_i32(tcg_op1);
10481 tcg_temp_free_i32(tcg_op2);
10482
10483 if (accop != 0) {
10484 if (opcode == 9 || opcode == 11) {
10485 /* saturating accumulate ops */
10486 if (accop < 0) {
10487 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10488 }
10489 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10490 tcg_res[pass],
10491 tcg_passres);
10492 } else {
10493 gen_neon_addl(size, (accop < 0), tcg_res[pass],
10494 tcg_res[pass], tcg_passres);
10495 }
10496 tcg_temp_free_i64(tcg_passres);
10497 }
10498 }
10499 }
10500
10501 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10502 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10503 tcg_temp_free_i64(tcg_res[0]);
10504 tcg_temp_free_i64(tcg_res[1]);
10505 }
10506
10507 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10508 int opcode, int rd, int rn, int rm)
10509 {
10510 TCGv_i64 tcg_res[2];
10511 int part = is_q ? 2 : 0;
10512 int pass;
10513
10514 for (pass = 0; pass < 2; pass++) {
10515 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10516 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10517 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10518 static NeonGenWidenFn * const widenfns[3][2] = {
10519 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10520 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10521 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10522 };
10523 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10524
10525 read_vec_element(s, tcg_op1, rn, pass, MO_64);
10526 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10527 widenfn(tcg_op2_wide, tcg_op2);
10528 tcg_temp_free_i32(tcg_op2);
10529 tcg_res[pass] = tcg_temp_new_i64();
10530 gen_neon_addl(size, (opcode == 3),
10531 tcg_res[pass], tcg_op1, tcg_op2_wide);
10532 tcg_temp_free_i64(tcg_op1);
10533 tcg_temp_free_i64(tcg_op2_wide);
10534 }
10535
10536 for (pass = 0; pass < 2; pass++) {
10537 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10538 tcg_temp_free_i64(tcg_res[pass]);
10539 }
10540 }
10541
10542 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10543 {
10544 tcg_gen_addi_i64(in, in, 1U << 31);
10545 tcg_gen_extrh_i64_i32(res, in);
10546 }
10547
10548 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10549 int opcode, int rd, int rn, int rm)
10550 {
10551 TCGv_i32 tcg_res[2];
10552 int part = is_q ? 2 : 0;
10553 int pass;
10554
10555 for (pass = 0; pass < 2; pass++) {
10556 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10557 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10558 TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10559 static NeonGenNarrowFn * const narrowfns[3][2] = {
10560 { gen_helper_neon_narrow_high_u8,
10561 gen_helper_neon_narrow_round_high_u8 },
10562 { gen_helper_neon_narrow_high_u16,
10563 gen_helper_neon_narrow_round_high_u16 },
10564 { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10565 };
10566 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10567
10568 read_vec_element(s, tcg_op1, rn, pass, MO_64);
10569 read_vec_element(s, tcg_op2, rm, pass, MO_64);
10570
10571 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10572
10573 tcg_temp_free_i64(tcg_op1);
10574 tcg_temp_free_i64(tcg_op2);
10575
10576 tcg_res[pass] = tcg_temp_new_i32();
10577 gennarrow(tcg_res[pass], tcg_wideres);
10578 tcg_temp_free_i64(tcg_wideres);
10579 }
10580
10581 for (pass = 0; pass < 2; pass++) {
10582 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10583 tcg_temp_free_i32(tcg_res[pass]);
10584 }
10585 clear_vec_high(s, is_q, rd);
10586 }
10587
10588 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
10589 {
10590 /* PMULL of 64 x 64 -> 128 is an odd special case because it
10591 * is the only three-reg-diff instruction which produces a
10592 * 128-bit wide result from a single operation. However since
10593 * it's possible to calculate the two halves more or less
10594 * separately we just use two helper calls.
10595 */
10596 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10597 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10598 TCGv_i64 tcg_res = tcg_temp_new_i64();
10599
10600 read_vec_element(s, tcg_op1, rn, is_q, MO_64);
10601 read_vec_element(s, tcg_op2, rm, is_q, MO_64);
10602 gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
10603 write_vec_element(s, tcg_res, rd, 0, MO_64);
10604 gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
10605 write_vec_element(s, tcg_res, rd, 1, MO_64);
10606
10607 tcg_temp_free_i64(tcg_op1);
10608 tcg_temp_free_i64(tcg_op2);
10609 tcg_temp_free_i64(tcg_res);
10610 }
10611
10612 /* AdvSIMD three different
10613 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
10614 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10615 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
10616 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10617 */
10618 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10619 {
10620 /* Instructions in this group fall into three basic classes
10621 * (in each case with the operation working on each element in
10622 * the input vectors):
10623 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10624 * 128 bit input)
10625 * (2) wide 64 x 128 -> 128
10626 * (3) narrowing 128 x 128 -> 64
10627 * Here we do initial decode, catch unallocated cases and
10628 * dispatch to separate functions for each class.
10629 */
10630 int is_q = extract32(insn, 30, 1);
10631 int is_u = extract32(insn, 29, 1);
10632 int size = extract32(insn, 22, 2);
10633 int opcode = extract32(insn, 12, 4);
10634 int rm = extract32(insn, 16, 5);
10635 int rn = extract32(insn, 5, 5);
10636 int rd = extract32(insn, 0, 5);
10637
10638 switch (opcode) {
10639 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10640 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10641 /* 64 x 128 -> 128 */
10642 if (size == 3) {
10643 unallocated_encoding(s);
10644 return;
10645 }
10646 if (!fp_access_check(s)) {
10647 return;
10648 }
10649 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10650 break;
10651 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10652 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10653 /* 128 x 128 -> 64 */
10654 if (size == 3) {
10655 unallocated_encoding(s);
10656 return;
10657 }
10658 if (!fp_access_check(s)) {
10659 return;
10660 }
10661 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10662 break;
10663 case 14: /* PMULL, PMULL2 */
10664 if (is_u || size == 1 || size == 2) {
10665 unallocated_encoding(s);
10666 return;
10667 }
10668 if (size == 3) {
10669 if (!dc_isar_feature(aa64_pmull, s)) {
10670 unallocated_encoding(s);
10671 return;
10672 }
10673 if (!fp_access_check(s)) {
10674 return;
10675 }
10676 handle_pmull_64(s, is_q, rd, rn, rm);
10677 return;
10678 }
10679 goto is_widening;
10680 case 9: /* SQDMLAL, SQDMLAL2 */
10681 case 11: /* SQDMLSL, SQDMLSL2 */
10682 case 13: /* SQDMULL, SQDMULL2 */
10683 if (is_u || size == 0) {
10684 unallocated_encoding(s);
10685 return;
10686 }
10687 /* fall through */
10688 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10689 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10690 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10691 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10692 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10693 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10694 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10695 /* 64 x 64 -> 128 */
10696 if (size == 3) {
10697 unallocated_encoding(s);
10698 return;
10699 }
10700 is_widening:
10701 if (!fp_access_check(s)) {
10702 return;
10703 }
10704
10705 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10706 break;
10707 default:
10708 /* opcode 15 not allocated */
10709 unallocated_encoding(s);
10710 break;
10711 }
10712 }
10713
10714 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10715 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10716 {
10717 int rd = extract32(insn, 0, 5);
10718 int rn = extract32(insn, 5, 5);
10719 int rm = extract32(insn, 16, 5);
10720 int size = extract32(insn, 22, 2);
10721 bool is_u = extract32(insn, 29, 1);
10722 bool is_q = extract32(insn, 30, 1);
10723
10724 if (!fp_access_check(s)) {
10725 return;
10726 }
10727
10728 switch (size + 4 * is_u) {
10729 case 0: /* AND */
10730 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10731 return;
10732 case 1: /* BIC */
10733 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10734 return;
10735 case 2: /* ORR */
10736 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10737 return;
10738 case 3: /* ORN */
10739 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10740 return;
10741 case 4: /* EOR */
10742 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10743 return;
10744
10745 case 5: /* BSL bitwise select */
10746 gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10747 return;
10748 case 6: /* BIT, bitwise insert if true */
10749 gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10750 return;
10751 case 7: /* BIF, bitwise insert if false */
10752 gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10753 return;
10754
10755 default:
10756 g_assert_not_reached();
10757 }
10758 }
10759
10760 /* Pairwise op subgroup of C3.6.16.
10761 *
10762 * This is called directly or via the handle_3same_float for float pairwise
10763 * operations where the opcode and size are calculated differently.
10764 */
10765 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10766 int size, int rn, int rm, int rd)
10767 {
10768 TCGv_ptr fpst;
10769 int pass;
10770
10771 /* Floating point operations need fpst */
10772 if (opcode >= 0x58) {
10773 fpst = get_fpstatus_ptr(false);
10774 } else {
10775 fpst = NULL;
10776 }
10777
10778 if (!fp_access_check(s)) {
10779 return;
10780 }
10781
10782 /* These operations work on the concatenated rm:rn, with each pair of
10783 * adjacent elements being operated on to produce an element in the result.
10784 */
10785 if (size == 3) {
10786 TCGv_i64 tcg_res[2];
10787
10788 for (pass = 0; pass < 2; pass++) {
10789 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10790 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10791 int passreg = (pass == 0) ? rn : rm;
10792
10793 read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10794 read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10795 tcg_res[pass] = tcg_temp_new_i64();
10796
10797 switch (opcode) {
10798 case 0x17: /* ADDP */
10799 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10800 break;
10801 case 0x58: /* FMAXNMP */
10802 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10803 break;
10804 case 0x5a: /* FADDP */
10805 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10806 break;
10807 case 0x5e: /* FMAXP */
10808 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10809 break;
10810 case 0x78: /* FMINNMP */
10811 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10812 break;
10813 case 0x7e: /* FMINP */
10814 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10815 break;
10816 default:
10817 g_assert_not_reached();
10818 }
10819
10820 tcg_temp_free_i64(tcg_op1);
10821 tcg_temp_free_i64(tcg_op2);
10822 }
10823
10824 for (pass = 0; pass < 2; pass++) {
10825 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10826 tcg_temp_free_i64(tcg_res[pass]);
10827 }
10828 } else {
10829 int maxpass = is_q ? 4 : 2;
10830 TCGv_i32 tcg_res[4];
10831
10832 for (pass = 0; pass < maxpass; pass++) {
10833 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10834 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10835 NeonGenTwoOpFn *genfn = NULL;
10836 int passreg = pass < (maxpass / 2) ? rn : rm;
10837 int passelt = (is_q && (pass & 1)) ? 2 : 0;
10838
10839 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10840 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10841 tcg_res[pass] = tcg_temp_new_i32();
10842
10843 switch (opcode) {
10844 case 0x17: /* ADDP */
10845 {
10846 static NeonGenTwoOpFn * const fns[3] = {
10847 gen_helper_neon_padd_u8,
10848 gen_helper_neon_padd_u16,
10849 tcg_gen_add_i32,
10850 };
10851 genfn = fns[size];
10852 break;
10853 }
10854 case 0x14: /* SMAXP, UMAXP */
10855 {
10856 static NeonGenTwoOpFn * const fns[3][2] = {
10857 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10858 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10859 { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10860 };
10861 genfn = fns[size][u];
10862 break;
10863 }
10864 case 0x15: /* SMINP, UMINP */
10865 {
10866 static NeonGenTwoOpFn * const fns[3][2] = {
10867 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10868 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10869 { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10870 };
10871 genfn = fns[size][u];
10872 break;
10873 }
10874 /* The FP operations are all on single floats (32 bit) */
10875 case 0x58: /* FMAXNMP */
10876 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10877 break;
10878 case 0x5a: /* FADDP */
10879 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10880 break;
10881 case 0x5e: /* FMAXP */
10882 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10883 break;
10884 case 0x78: /* FMINNMP */
10885 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10886 break;
10887 case 0x7e: /* FMINP */
10888 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10889 break;
10890 default:
10891 g_assert_not_reached();
10892 }
10893
10894 /* FP ops called directly, otherwise call now */
10895 if (genfn) {
10896 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10897 }
10898
10899 tcg_temp_free_i32(tcg_op1);
10900 tcg_temp_free_i32(tcg_op2);
10901 }
10902
10903 for (pass = 0; pass < maxpass; pass++) {
10904 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10905 tcg_temp_free_i32(tcg_res[pass]);
10906 }
10907 clear_vec_high(s, is_q, rd);
10908 }
10909
10910 if (fpst) {
10911 tcg_temp_free_ptr(fpst);
10912 }
10913 }
10914
10915 /* Floating point op subgroup of C3.6.16. */
10916 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10917 {
10918 /* For floating point ops, the U, size[1] and opcode bits
10919 * together indicate the operation. size[0] indicates single
10920 * or double.
10921 */
10922 int fpopcode = extract32(insn, 11, 5)
10923 | (extract32(insn, 23, 1) << 5)
10924 | (extract32(insn, 29, 1) << 6);
10925 int is_q = extract32(insn, 30, 1);
10926 int size = extract32(insn, 22, 1);
10927 int rm = extract32(insn, 16, 5);
10928 int rn = extract32(insn, 5, 5);
10929 int rd = extract32(insn, 0, 5);
10930
10931 int datasize = is_q ? 128 : 64;
10932 int esize = 32 << size;
10933 int elements = datasize / esize;
10934
10935 if (size == 1 && !is_q) {
10936 unallocated_encoding(s);
10937 return;
10938 }
10939
10940 switch (fpopcode) {
10941 case 0x58: /* FMAXNMP */
10942 case 0x5a: /* FADDP */
10943 case 0x5e: /* FMAXP */
10944 case 0x78: /* FMINNMP */
10945 case 0x7e: /* FMINP */
10946 if (size && !is_q) {
10947 unallocated_encoding(s);
10948 return;
10949 }
10950 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10951 rn, rm, rd);
10952 return;
10953 case 0x1b: /* FMULX */
10954 case 0x1f: /* FRECPS */
10955 case 0x3f: /* FRSQRTS */
10956 case 0x5d: /* FACGE */
10957 case 0x7d: /* FACGT */
10958 case 0x19: /* FMLA */
10959 case 0x39: /* FMLS */
10960 case 0x18: /* FMAXNM */
10961 case 0x1a: /* FADD */
10962 case 0x1c: /* FCMEQ */
10963 case 0x1e: /* FMAX */
10964 case 0x38: /* FMINNM */
10965 case 0x3a: /* FSUB */
10966 case 0x3e: /* FMIN */
10967 case 0x5b: /* FMUL */
10968 case 0x5c: /* FCMGE */
10969 case 0x5f: /* FDIV */
10970 case 0x7a: /* FABD */
10971 case 0x7c: /* FCMGT */
10972 if (!fp_access_check(s)) {
10973 return;
10974 }
10975 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
10976 return;
10977
10978 case 0x1d: /* FMLAL */
10979 case 0x3d: /* FMLSL */
10980 case 0x59: /* FMLAL2 */
10981 case 0x79: /* FMLSL2 */
10982 if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
10983 unallocated_encoding(s);
10984 return;
10985 }
10986 if (fp_access_check(s)) {
10987 int is_s = extract32(insn, 23, 1);
10988 int is_2 = extract32(insn, 29, 1);
10989 int data = (is_2 << 1) | is_s;
10990 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
10991 vec_full_reg_offset(s, rn),
10992 vec_full_reg_offset(s, rm), cpu_env,
10993 is_q ? 16 : 8, vec_full_reg_size(s),
10994 data, gen_helper_gvec_fmlal_a64);
10995 }
10996 return;
10997
10998 default:
10999 unallocated_encoding(s);
11000 return;
11001 }
11002 }
11003
11004 /* Integer op subgroup of C3.6.16. */
11005 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11006 {
11007 int is_q = extract32(insn, 30, 1);
11008 int u = extract32(insn, 29, 1);
11009 int size = extract32(insn, 22, 2);
11010 int opcode = extract32(insn, 11, 5);
11011 int rm = extract32(insn, 16, 5);
11012 int rn = extract32(insn, 5, 5);
11013 int rd = extract32(insn, 0, 5);
11014 int pass;
11015 TCGCond cond;
11016
11017 switch (opcode) {
11018 case 0x13: /* MUL, PMUL */
11019 if (u && size != 0) {
11020 unallocated_encoding(s);
11021 return;
11022 }
11023 /* fall through */
11024 case 0x0: /* SHADD, UHADD */
11025 case 0x2: /* SRHADD, URHADD */
11026 case 0x4: /* SHSUB, UHSUB */
11027 case 0xc: /* SMAX, UMAX */
11028 case 0xd: /* SMIN, UMIN */
11029 case 0xe: /* SABD, UABD */
11030 case 0xf: /* SABA, UABA */
11031 case 0x12: /* MLA, MLS */
11032 if (size == 3) {
11033 unallocated_encoding(s);
11034 return;
11035 }
11036 break;
11037 case 0x16: /* SQDMULH, SQRDMULH */
11038 if (size == 0 || size == 3) {
11039 unallocated_encoding(s);
11040 return;
11041 }
11042 break;
11043 default:
11044 if (size == 3 && !is_q) {
11045 unallocated_encoding(s);
11046 return;
11047 }
11048 break;
11049 }
11050
11051 if (!fp_access_check(s)) {
11052 return;
11053 }
11054
11055 switch (opcode) {
11056 case 0x01: /* SQADD, UQADD */
11057 tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
11058 offsetof(CPUARMState, vfp.qc),
11059 vec_full_reg_offset(s, rn),
11060 vec_full_reg_offset(s, rm),
11061 is_q ? 16 : 8, vec_full_reg_size(s),
11062 (u ? uqadd_op : sqadd_op) + size);
11063 return;
11064 case 0x05: /* SQSUB, UQSUB */
11065 tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
11066 offsetof(CPUARMState, vfp.qc),
11067 vec_full_reg_offset(s, rn),
11068 vec_full_reg_offset(s, rm),
11069 is_q ? 16 : 8, vec_full_reg_size(s),
11070 (u ? uqsub_op : sqsub_op) + size);
11071 return;
11072 case 0x0c: /* SMAX, UMAX */
11073 if (u) {
11074 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11075 } else {
11076 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11077 }
11078 return;
11079 case 0x0d: /* SMIN, UMIN */
11080 if (u) {
11081 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11082 } else {
11083 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11084 }
11085 return;
11086 case 0x10: /* ADD, SUB */
11087 if (u) {
11088 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11089 } else {
11090 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11091 }
11092 return;
11093 case 0x13: /* MUL, PMUL */
11094 if (!u) { /* MUL */
11095 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11096 return;
11097 }
11098 break;
11099 case 0x12: /* MLA, MLS */
11100 if (u) {
11101 gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
11102 } else {
11103 gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
11104 }
11105 return;
11106 case 0x11:
11107 if (!u) { /* CMTST */
11108 gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
11109 return;
11110 }
11111 /* else CMEQ */
11112 cond = TCG_COND_EQ;
11113 goto do_gvec_cmp;
11114 case 0x06: /* CMGT, CMHI */
11115 cond = u ? TCG_COND_GTU : TCG_COND_GT;
11116 goto do_gvec_cmp;
11117 case 0x07: /* CMGE, CMHS */
11118 cond = u ? TCG_COND_GEU : TCG_COND_GE;
11119 do_gvec_cmp:
11120 tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11121 vec_full_reg_offset(s, rn),
11122 vec_full_reg_offset(s, rm),
11123 is_q ? 16 : 8, vec_full_reg_size(s));
11124 return;
11125 }
11126
11127 if (size == 3) {
11128 assert(is_q);
11129 for (pass = 0; pass < 2; pass++) {
11130 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11131 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11132 TCGv_i64 tcg_res = tcg_temp_new_i64();
11133
11134 read_vec_element(s, tcg_op1, rn, pass, MO_64);
11135 read_vec_element(s, tcg_op2, rm, pass, MO_64);
11136
11137 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11138
11139 write_vec_element(s, tcg_res, rd, pass, MO_64);
11140
11141 tcg_temp_free_i64(tcg_res);
11142 tcg_temp_free_i64(tcg_op1);
11143 tcg_temp_free_i64(tcg_op2);
11144 }
11145 } else {
11146 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11147 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11148 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11149 TCGv_i32 tcg_res = tcg_temp_new_i32();
11150 NeonGenTwoOpFn *genfn = NULL;
11151 NeonGenTwoOpEnvFn *genenvfn = NULL;
11152
11153 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11154 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11155
11156 switch (opcode) {
11157 case 0x0: /* SHADD, UHADD */
11158 {
11159 static NeonGenTwoOpFn * const fns[3][2] = {
11160 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11161 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11162 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11163 };
11164 genfn = fns[size][u];
11165 break;
11166 }
11167 case 0x2: /* SRHADD, URHADD */
11168 {
11169 static NeonGenTwoOpFn * const fns[3][2] = {
11170 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11171 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11172 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11173 };
11174 genfn = fns[size][u];
11175 break;
11176 }
11177 case 0x4: /* SHSUB, UHSUB */
11178 {
11179 static NeonGenTwoOpFn * const fns[3][2] = {
11180 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11181 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11182 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11183 };
11184 genfn = fns[size][u];
11185 break;
11186 }
11187 case 0x8: /* SSHL, USHL */
11188 {
11189 static NeonGenTwoOpFn * const fns[3][2] = {
11190 { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
11191 { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
11192 { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
11193 };
11194 genfn = fns[size][u];
11195 break;
11196 }
11197 case 0x9: /* SQSHL, UQSHL */
11198 {
11199 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11200 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11201 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11202 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11203 };
11204 genenvfn = fns[size][u];
11205 break;
11206 }
11207 case 0xa: /* SRSHL, URSHL */
11208 {
11209 static NeonGenTwoOpFn * const fns[3][2] = {
11210 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11211 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11212 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11213 };
11214 genfn = fns[size][u];
11215 break;
11216 }
11217 case 0xb: /* SQRSHL, UQRSHL */
11218 {
11219 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11220 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11221 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11222 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11223 };
11224 genenvfn = fns[size][u];
11225 break;
11226 }
11227 case 0xe: /* SABD, UABD */
11228 case 0xf: /* SABA, UABA */
11229 {
11230 static NeonGenTwoOpFn * const fns[3][2] = {
11231 { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
11232 { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
11233 { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
11234 };
11235 genfn = fns[size][u];
11236 break;
11237 }
11238 case 0x13: /* MUL, PMUL */
11239 assert(u); /* PMUL */
11240 assert(size == 0);
11241 genfn = gen_helper_neon_mul_p8;
11242 break;
11243 case 0x16: /* SQDMULH, SQRDMULH */
11244 {
11245 static NeonGenTwoOpEnvFn * const fns[2][2] = {
11246 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
11247 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
11248 };
11249 assert(size == 1 || size == 2);
11250 genenvfn = fns[size - 1][u];
11251 break;
11252 }
11253 default:
11254 g_assert_not_reached();
11255 }
11256
11257 if (genenvfn) {
11258 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11259 } else {
11260 genfn(tcg_res, tcg_op1, tcg_op2);
11261 }
11262
11263 if (opcode == 0xf) {
11264 /* SABA, UABA: accumulating ops */
11265 static NeonGenTwoOpFn * const fns[3] = {
11266 gen_helper_neon_add_u8,
11267 gen_helper_neon_add_u16,
11268 tcg_gen_add_i32,
11269 };
11270
11271 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
11272 fns[size](tcg_res, tcg_op1, tcg_res);
11273 }
11274
11275 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11276
11277 tcg_temp_free_i32(tcg_res);
11278 tcg_temp_free_i32(tcg_op1);
11279 tcg_temp_free_i32(tcg_op2);
11280 }
11281 }
11282 clear_vec_high(s, is_q, rd);
11283 }
11284
11285 /* AdvSIMD three same
11286 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
11287 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11288 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
11289 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11290 */
11291 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11292 {
11293 int opcode = extract32(insn, 11, 5);
11294
11295 switch (opcode) {
11296 case 0x3: /* logic ops */
11297 disas_simd_3same_logic(s, insn);
11298 break;
11299 case 0x17: /* ADDP */
11300 case 0x14: /* SMAXP, UMAXP */
11301 case 0x15: /* SMINP, UMINP */
11302 {
11303 /* Pairwise operations */
11304 int is_q = extract32(insn, 30, 1);
11305 int u = extract32(insn, 29, 1);
11306 int size = extract32(insn, 22, 2);
11307 int rm = extract32(insn, 16, 5);
11308 int rn = extract32(insn, 5, 5);
11309 int rd = extract32(insn, 0, 5);
11310 if (opcode == 0x17) {
11311 if (u || (size == 3 && !is_q)) {
11312 unallocated_encoding(s);
11313 return;
11314 }
11315 } else {
11316 if (size == 3) {
11317 unallocated_encoding(s);
11318 return;
11319 }
11320 }
11321 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11322 break;
11323 }
11324 case 0x18 ... 0x31:
11325 /* floating point ops, sz[1] and U are part of opcode */
11326 disas_simd_3same_float(s, insn);
11327 break;
11328 default:
11329 disas_simd_3same_int(s, insn);
11330 break;
11331 }
11332 }
11333
11334 /*
11335 * Advanced SIMD three same (ARMv8.2 FP16 variants)
11336 *
11337 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
11338 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11339 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
11340 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11341 *
11342 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11343 * (register), FACGE, FABD, FCMGT (register) and FACGT.
11344 *
11345 */
11346 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11347 {
11348 int opcode, fpopcode;
11349 int is_q, u, a, rm, rn, rd;
11350 int datasize, elements;
11351 int pass;
11352 TCGv_ptr fpst;
11353 bool pairwise = false;
11354
11355 if (!dc_isar_feature(aa64_fp16, s)) {
11356 unallocated_encoding(s);
11357 return;
11358 }
11359
11360 if (!fp_access_check(s)) {
11361 return;
11362 }
11363
11364 /* For these floating point ops, the U, a and opcode bits
11365 * together indicate the operation.
11366 */
11367 opcode = extract32(insn, 11, 3);
11368 u = extract32(insn, 29, 1);
11369 a = extract32(insn, 23, 1);
11370 is_q = extract32(insn, 30, 1);
11371 rm = extract32(insn, 16, 5);
11372 rn = extract32(insn, 5, 5);
11373 rd = extract32(insn, 0, 5);
11374
11375 fpopcode = opcode | (a << 3) | (u << 4);
11376 datasize = is_q ? 128 : 64;
11377 elements = datasize / 16;
11378
11379 switch (fpopcode) {
11380 case 0x10: /* FMAXNMP */
11381 case 0x12: /* FADDP */
11382 case 0x16: /* FMAXP */
11383 case 0x18: /* FMINNMP */
11384 case 0x1e: /* FMINP */
11385 pairwise = true;
11386 break;
11387 }
11388
11389 fpst = get_fpstatus_ptr(true);
11390
11391 if (pairwise) {
11392 int maxpass = is_q ? 8 : 4;
11393 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11394 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11395 TCGv_i32 tcg_res[8];
11396
11397 for (pass = 0; pass < maxpass; pass++) {
11398 int passreg = pass < (maxpass / 2) ? rn : rm;
11399 int passelt = (pass << 1) & (maxpass - 1);
11400
11401 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11402 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11403 tcg_res[pass] = tcg_temp_new_i32();
11404
11405 switch (fpopcode) {
11406 case 0x10: /* FMAXNMP */
11407 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11408 fpst);
11409 break;
11410 case 0x12: /* FADDP */
11411 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11412 break;
11413 case 0x16: /* FMAXP */
11414 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11415 break;
11416 case 0x18: /* FMINNMP */
11417 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11418 fpst);
11419 break;
11420 case 0x1e: /* FMINP */
11421 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11422 break;
11423 default:
11424 g_assert_not_reached();
11425 }
11426 }
11427
11428 for (pass = 0; pass < maxpass; pass++) {
11429 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11430 tcg_temp_free_i32(tcg_res[pass]);
11431 }
11432
11433 tcg_temp_free_i32(tcg_op1);
11434 tcg_temp_free_i32(tcg_op2);
11435
11436 } else {
11437 for (pass = 0; pass < elements; pass++) {
11438 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11439 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11440 TCGv_i32 tcg_res = tcg_temp_new_i32();
11441
11442 read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11443 read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11444
11445 switch (fpopcode) {
11446 case 0x0: /* FMAXNM */
11447 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11448 break;
11449 case 0x1: /* FMLA */
11450 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11451 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11452 fpst);
11453 break;
11454 case 0x2: /* FADD */
11455 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11456 break;
11457 case 0x3: /* FMULX */
11458 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11459 break;
11460 case 0x4: /* FCMEQ */
11461 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11462 break;
11463 case 0x6: /* FMAX */
11464 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11465 break;
11466 case 0x7: /* FRECPS */
11467 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11468 break;
11469 case 0x8: /* FMINNM */
11470 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11471 break;
11472 case 0x9: /* FMLS */
11473 /* As usual for ARM, separate negation for fused multiply-add */
11474 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11475 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11476 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11477 fpst);
11478 break;
11479 case 0xa: /* FSUB */
11480 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11481 break;
11482 case 0xe: /* FMIN */
11483 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11484 break;
11485 case 0xf: /* FRSQRTS */
11486 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11487 break;
11488 case 0x13: /* FMUL */
11489 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11490 break;
11491 case 0x14: /* FCMGE */
11492 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11493 break;
11494 case 0x15: /* FACGE */
11495 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11496 break;
11497 case 0x17: /* FDIV */
11498 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11499 break;
11500 case 0x1a: /* FABD */
11501 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11502 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11503 break;
11504 case 0x1c: /* FCMGT */
11505 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11506 break;
11507 case 0x1d: /* FACGT */
11508 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11509 break;
11510 default:
11511 fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
11512 __func__, insn, fpopcode, s->pc_curr);
11513 g_assert_not_reached();
11514 }
11515
11516 write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11517 tcg_temp_free_i32(tcg_res);
11518 tcg_temp_free_i32(tcg_op1);
11519 tcg_temp_free_i32(tcg_op2);
11520 }
11521 }
11522
11523 tcg_temp_free_ptr(fpst);
11524
11525 clear_vec_high(s, is_q, rd);
11526 }
11527
11528 /* AdvSIMD three same extra
11529 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
11530 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11531 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
11532 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11533 */
11534 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11535 {
11536 int rd = extract32(insn, 0, 5);
11537 int rn = extract32(insn, 5, 5);
11538 int opcode = extract32(insn, 11, 4);
11539 int rm = extract32(insn, 16, 5);
11540 int size = extract32(insn, 22, 2);
11541 bool u = extract32(insn, 29, 1);
11542 bool is_q = extract32(insn, 30, 1);
11543 bool feature;
11544 int rot;
11545
11546 switch (u * 16 + opcode) {
11547 case 0x10: /* SQRDMLAH (vector) */
11548 case 0x11: /* SQRDMLSH (vector) */
11549 if (size != 1 && size != 2) {
11550 unallocated_encoding(s);
11551 return;
11552 }
11553 feature = dc_isar_feature(aa64_rdm, s);
11554 break;
11555 case 0x02: /* SDOT (vector) */
11556 case 0x12: /* UDOT (vector) */
11557 if (size != MO_32) {
11558 unallocated_encoding(s);
11559 return;
11560 }
11561 feature = dc_isar_feature(aa64_dp, s);
11562 break;
11563 case 0x18: /* FCMLA, #0 */
11564 case 0x19: /* FCMLA, #90 */
11565 case 0x1a: /* FCMLA, #180 */
11566 case 0x1b: /* FCMLA, #270 */
11567 case 0x1c: /* FCADD, #90 */
11568 case 0x1e: /* FCADD, #270 */
11569 if (size == 0
11570 || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11571 || (size == 3 && !is_q)) {
11572 unallocated_encoding(s);
11573 return;
11574 }
11575 feature = dc_isar_feature(aa64_fcma, s);
11576 break;
11577 default:
11578 unallocated_encoding(s);
11579 return;
11580 }
11581 if (!feature) {
11582 unallocated_encoding(s);
11583 return;
11584 }
11585 if (!fp_access_check(s)) {
11586 return;
11587 }
11588
11589 switch (opcode) {
11590 case 0x0: /* SQRDMLAH (vector) */
11591 switch (size) {
11592 case 1:
11593 gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
11594 break;
11595 case 2:
11596 gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
11597 break;
11598 default:
11599 g_assert_not_reached();
11600 }
11601 return;
11602
11603 case 0x1: /* SQRDMLSH (vector) */
11604 switch (size) {
11605 case 1:
11606 gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
11607 break;
11608 case 2:
11609 gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
11610 break;
11611 default:
11612 g_assert_not_reached();
11613 }
11614 return;
11615
11616 case 0x2: /* SDOT / UDOT */
11617 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
11618 u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11619 return;
11620
11621 case 0x8: /* FCMLA, #0 */
11622 case 0x9: /* FCMLA, #90 */
11623 case 0xa: /* FCMLA, #180 */
11624 case 0xb: /* FCMLA, #270 */
11625 rot = extract32(opcode, 0, 2);
11626 switch (size) {
11627 case 1:
11628 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
11629 gen_helper_gvec_fcmlah);
11630 break;
11631 case 2:
11632 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11633 gen_helper_gvec_fcmlas);
11634 break;
11635 case 3:
11636 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11637 gen_helper_gvec_fcmlad);
11638 break;
11639 default:
11640 g_assert_not_reached();
11641 }
11642 return;
11643
11644 case 0xc: /* FCADD, #90 */
11645 case 0xe: /* FCADD, #270 */
11646 rot = extract32(opcode, 1, 1);
11647 switch (size) {
11648 case 1:
11649 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11650 gen_helper_gvec_fcaddh);
11651 break;
11652 case 2:
11653 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11654 gen_helper_gvec_fcadds);
11655 break;
11656 case 3:
11657 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11658 gen_helper_gvec_fcaddd);
11659 break;
11660 default:
11661 g_assert_not_reached();
11662 }
11663 return;
11664
11665 default:
11666 g_assert_not_reached();
11667 }
11668 }
11669
11670 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11671 int size, int rn, int rd)
11672 {
11673 /* Handle 2-reg-misc ops which are widening (so each size element
11674 * in the source becomes a 2*size element in the destination.
11675 * The only instruction like this is FCVTL.
11676 */
11677 int pass;
11678
11679 if (size == 3) {
11680 /* 32 -> 64 bit fp conversion */
11681 TCGv_i64 tcg_res[2];
11682 int srcelt = is_q ? 2 : 0;
11683
11684 for (pass = 0; pass < 2; pass++) {
11685 TCGv_i32 tcg_op = tcg_temp_new_i32();
11686 tcg_res[pass] = tcg_temp_new_i64();
11687
11688 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11689 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11690 tcg_temp_free_i32(tcg_op);
11691 }
11692 for (pass = 0; pass < 2; pass++) {
11693 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11694 tcg_temp_free_i64(tcg_res[pass]);
11695 }
11696 } else {
11697 /* 16 -> 32 bit fp conversion */
11698 int srcelt = is_q ? 4 : 0;
11699 TCGv_i32 tcg_res[4];
11700 TCGv_ptr fpst = get_fpstatus_ptr(false);
11701 TCGv_i32 ahp = get_ahp_flag();
11702
11703 for (pass = 0; pass < 4; pass++) {
11704 tcg_res[pass] = tcg_temp_new_i32();
11705
11706 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11707 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11708 fpst, ahp);
11709 }
11710 for (pass = 0; pass < 4; pass++) {
11711 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11712 tcg_temp_free_i32(tcg_res[pass]);
11713 }
11714
11715 tcg_temp_free_ptr(fpst);
11716 tcg_temp_free_i32(ahp);
11717 }
11718 }
11719
11720 static void handle_rev(DisasContext *s, int opcode, bool u,
11721 bool is_q, int size, int rn, int rd)
11722 {
11723 int op = (opcode << 1) | u;
11724 int opsz = op + size;
11725 int grp_size = 3 - opsz;
11726 int dsize = is_q ? 128 : 64;
11727 int i;
11728
11729 if (opsz >= 3) {
11730 unallocated_encoding(s);
11731 return;
11732 }
11733
11734 if (!fp_access_check(s)) {
11735 return;
11736 }
11737
11738 if (size == 0) {
11739 /* Special case bytes, use bswap op on each group of elements */
11740 int groups = dsize / (8 << grp_size);
11741
11742 for (i = 0; i < groups; i++) {
11743 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11744
11745 read_vec_element(s, tcg_tmp, rn, i, grp_size);
11746 switch (grp_size) {
11747 case MO_16:
11748 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
11749 break;
11750 case MO_32:
11751 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
11752 break;
11753 case MO_64:
11754 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11755 break;
11756 default:
11757 g_assert_not_reached();
11758 }
11759 write_vec_element(s, tcg_tmp, rd, i, grp_size);
11760 tcg_temp_free_i64(tcg_tmp);
11761 }
11762 clear_vec_high(s, is_q, rd);
11763 } else {
11764 int revmask = (1 << grp_size) - 1;
11765 int esize = 8 << size;
11766 int elements = dsize / esize;
11767 TCGv_i64 tcg_rn = tcg_temp_new_i64();
11768 TCGv_i64 tcg_rd = tcg_const_i64(0);
11769 TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
11770
11771 for (i = 0; i < elements; i++) {
11772 int e_rev = (i & 0xf) ^ revmask;
11773 int off = e_rev * esize;
11774 read_vec_element(s, tcg_rn, rn, i, size);
11775 if (off >= 64) {
11776 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
11777 tcg_rn, off - 64, esize);
11778 } else {
11779 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
11780 }
11781 }
11782 write_vec_element(s, tcg_rd, rd, 0, MO_64);
11783 write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
11784
11785 tcg_temp_free_i64(tcg_rd_hi);
11786 tcg_temp_free_i64(tcg_rd);
11787 tcg_temp_free_i64(tcg_rn);
11788 }
11789 }
11790
11791 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11792 bool is_q, int size, int rn, int rd)
11793 {
11794 /* Implement the pairwise operations from 2-misc:
11795 * SADDLP, UADDLP, SADALP, UADALP.
11796 * These all add pairs of elements in the input to produce a
11797 * double-width result element in the output (possibly accumulating).
11798 */
11799 bool accum = (opcode == 0x6);
11800 int maxpass = is_q ? 2 : 1;
11801 int pass;
11802 TCGv_i64 tcg_res[2];
11803
11804 if (size == 2) {
11805 /* 32 + 32 -> 64 op */
11806 MemOp memop = size + (u ? 0 : MO_SIGN);
11807
11808 for (pass = 0; pass < maxpass; pass++) {
11809 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11810 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11811
11812 tcg_res[pass] = tcg_temp_new_i64();
11813
11814 read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11815 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11816 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11817 if (accum) {
11818 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11819 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11820 }
11821
11822 tcg_temp_free_i64(tcg_op1);
11823 tcg_temp_free_i64(tcg_op2);
11824 }
11825 } else {
11826 for (pass = 0; pass < maxpass; pass++) {
11827 TCGv_i64 tcg_op = tcg_temp_new_i64();
11828 NeonGenOneOpFn *genfn;
11829 static NeonGenOneOpFn * const fns[2][2] = {
11830 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
11831 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
11832 };
11833
11834 genfn = fns[size][u];
11835
11836 tcg_res[pass] = tcg_temp_new_i64();
11837
11838 read_vec_element(s, tcg_op, rn, pass, MO_64);
11839 genfn(tcg_res[pass], tcg_op);
11840
11841 if (accum) {
11842 read_vec_element(s, tcg_op, rd, pass, MO_64);
11843 if (size == 0) {
11844 gen_helper_neon_addl_u16(tcg_res[pass],
11845 tcg_res[pass], tcg_op);
11846 } else {
11847 gen_helper_neon_addl_u32(tcg_res[pass],
11848 tcg_res[pass], tcg_op);
11849 }
11850 }
11851 tcg_temp_free_i64(tcg_op);
11852 }
11853 }
11854 if (!is_q) {
11855 tcg_res[1] = tcg_const_i64(0);
11856 }
11857 for (pass = 0; pass < 2; pass++) {
11858 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11859 tcg_temp_free_i64(tcg_res[pass]);
11860 }
11861 }
11862
11863 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11864 {
11865 /* Implement SHLL and SHLL2 */
11866 int pass;
11867 int part = is_q ? 2 : 0;
11868 TCGv_i64 tcg_res[2];
11869
11870 for (pass = 0; pass < 2; pass++) {
11871 static NeonGenWidenFn * const widenfns[3] = {
11872 gen_helper_neon_widen_u8,
11873 gen_helper_neon_widen_u16,
11874 tcg_gen_extu_i32_i64,
11875 };
11876 NeonGenWidenFn *widenfn = widenfns[size];
11877 TCGv_i32 tcg_op = tcg_temp_new_i32();
11878
11879 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11880 tcg_res[pass] = tcg_temp_new_i64();
11881 widenfn(tcg_res[pass], tcg_op);
11882 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11883
11884 tcg_temp_free_i32(tcg_op);
11885 }
11886
11887 for (pass = 0; pass < 2; pass++) {
11888 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11889 tcg_temp_free_i64(tcg_res[pass]);
11890 }
11891 }
11892
11893 /* AdvSIMD two reg misc
11894 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
11895 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11896 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
11897 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11898 */
11899 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11900 {
11901 int size = extract32(insn, 22, 2);
11902 int opcode = extract32(insn, 12, 5);
11903 bool u = extract32(insn, 29, 1);
11904 bool is_q = extract32(insn, 30, 1);
11905 int rn = extract32(insn, 5, 5);
11906 int rd = extract32(insn, 0, 5);
11907 bool need_fpstatus = false;
11908 bool need_rmode = false;
11909 int rmode = -1;
11910 TCGv_i32 tcg_rmode;
11911 TCGv_ptr tcg_fpstatus;
11912
11913 switch (opcode) {
11914 case 0x0: /* REV64, REV32 */
11915 case 0x1: /* REV16 */
11916 handle_rev(s, opcode, u, is_q, size, rn, rd);
11917 return;
11918 case 0x5: /* CNT, NOT, RBIT */
11919 if (u && size == 0) {
11920 /* NOT */
11921 break;
11922 } else if (u && size == 1) {
11923 /* RBIT */
11924 break;
11925 } else if (!u && size == 0) {
11926 /* CNT */
11927 break;
11928 }
11929 unallocated_encoding(s);
11930 return;
11931 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11932 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11933 if (size == 3) {
11934 unallocated_encoding(s);
11935 return;
11936 }
11937 if (!fp_access_check(s)) {
11938 return;
11939 }
11940
11941 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11942 return;
11943 case 0x4: /* CLS, CLZ */
11944 if (size == 3) {
11945 unallocated_encoding(s);
11946 return;
11947 }
11948 break;
11949 case 0x2: /* SADDLP, UADDLP */
11950 case 0x6: /* SADALP, UADALP */
11951 if (size == 3) {
11952 unallocated_encoding(s);
11953 return;
11954 }
11955 if (!fp_access_check(s)) {
11956 return;
11957 }
11958 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11959 return;
11960 case 0x13: /* SHLL, SHLL2 */
11961 if (u == 0 || size == 3) {
11962 unallocated_encoding(s);
11963 return;
11964 }
11965 if (!fp_access_check(s)) {
11966 return;
11967 }
11968 handle_shll(s, is_q, size, rn, rd);
11969 return;
11970 case 0xa: /* CMLT */
11971 if (u == 1) {
11972 unallocated_encoding(s);
11973 return;
11974 }
11975 /* fall through */
11976 case 0x8: /* CMGT, CMGE */
11977 case 0x9: /* CMEQ, CMLE */
11978 case 0xb: /* ABS, NEG */
11979 if (size == 3 && !is_q) {
11980 unallocated_encoding(s);
11981 return;
11982 }
11983 break;
11984 case 0x3: /* SUQADD, USQADD */
11985 if (size == 3 && !is_q) {
11986 unallocated_encoding(s);
11987 return;
11988 }
11989 if (!fp_access_check(s)) {
11990 return;
11991 }
11992 handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
11993 return;
11994 case 0x7: /* SQABS, SQNEG */
11995 if (size == 3 && !is_q) {
11996 unallocated_encoding(s);
11997 return;
11998 }
11999 break;
12000 case 0xc ... 0xf:
12001 case 0x16 ... 0x1f:
12002 {
12003 /* Floating point: U, size[1] and opcode indicate operation;
12004 * size[0] indicates single or double precision.
12005 */
12006 int is_double = extract32(size, 0, 1);
12007 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12008 size = is_double ? 3 : 2;
12009 switch (opcode) {
12010 case 0x2f: /* FABS */
12011 case 0x6f: /* FNEG */
12012 if (size == 3 && !is_q) {
12013 unallocated_encoding(s);
12014 return;
12015 }
12016 break;
12017 case 0x1d: /* SCVTF */
12018 case 0x5d: /* UCVTF */
12019 {
12020 bool is_signed = (opcode == 0x1d) ? true : false;
12021 int elements = is_double ? 2 : is_q ? 4 : 2;
12022 if (is_double && !is_q) {
12023 unallocated_encoding(s);
12024 return;
12025 }
12026 if (!fp_access_check(s)) {
12027 return;
12028 }
12029 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12030 return;
12031 }
12032 case 0x2c: /* FCMGT (zero) */
12033 case 0x2d: /* FCMEQ (zero) */
12034 case 0x2e: /* FCMLT (zero) */
12035 case 0x6c: /* FCMGE (zero) */
12036 case 0x6d: /* FCMLE (zero) */
12037 if (size == 3 && !is_q) {
12038 unallocated_encoding(s);
12039 return;
12040 }
12041 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12042 return;
12043 case 0x7f: /* FSQRT */
12044 if (size == 3 && !is_q) {
12045 unallocated_encoding(s);
12046 return;
12047 }
12048 break;
12049 case 0x1a: /* FCVTNS */
12050 case 0x1b: /* FCVTMS */
12051 case 0x3a: /* FCVTPS */
12052 case 0x3b: /* FCVTZS */
12053 case 0x5a: /* FCVTNU */
12054 case 0x5b: /* FCVTMU */
12055 case 0x7a: /* FCVTPU */
12056 case 0x7b: /* FCVTZU */
12057 need_fpstatus = true;
12058 need_rmode = true;
12059 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12060 if (size == 3 && !is_q) {
12061 unallocated_encoding(s);
12062 return;
12063 }
12064 break;
12065 case 0x5c: /* FCVTAU */
12066 case 0x1c: /* FCVTAS */
12067 need_fpstatus = true;
12068 need_rmode = true;
12069 rmode = FPROUNDING_TIEAWAY;
12070 if (size == 3 && !is_q) {
12071 unallocated_encoding(s);
12072 return;
12073 }
12074 break;
12075 case 0x3c: /* URECPE */
12076 if (size == 3) {
12077 unallocated_encoding(s);
12078 return;
12079 }
12080 /* fall through */
12081 case 0x3d: /* FRECPE */
12082 case 0x7d: /* FRSQRTE */
12083 if (size == 3 && !is_q) {
12084 unallocated_encoding(s);
12085 return;
12086 }
12087 if (!fp_access_check(s)) {
12088 return;
12089 }
12090 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12091 return;
12092 case 0x56: /* FCVTXN, FCVTXN2 */
12093 if (size == 2) {
12094 unallocated_encoding(s);
12095 return;
12096 }
12097 /* fall through */
12098 case 0x16: /* FCVTN, FCVTN2 */
12099 /* handle_2misc_narrow does a 2*size -> size operation, but these
12100 * instructions encode the source size rather than dest size.
12101 */
12102 if (!fp_access_check(s)) {
12103 return;
12104 }
12105 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12106 return;
12107 case 0x17: /* FCVTL, FCVTL2 */
12108 if (!fp_access_check(s)) {
12109 return;
12110 }
12111 handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12112 return;
12113 case 0x18: /* FRINTN */
12114 case 0x19: /* FRINTM */
12115 case 0x38: /* FRINTP */
12116 case 0x39: /* FRINTZ */
12117 need_rmode = true;
12118 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12119 /* fall through */
12120 case 0x59: /* FRINTX */
12121 case 0x79: /* FRINTI */
12122 need_fpstatus = true;
12123 if (size == 3 && !is_q) {
12124 unallocated_encoding(s);
12125 return;
12126 }
12127 break;
12128 case 0x58: /* FRINTA */
12129 need_rmode = true;
12130 rmode = FPROUNDING_TIEAWAY;
12131 need_fpstatus = true;
12132 if (size == 3 && !is_q) {
12133 unallocated_encoding(s);
12134 return;
12135 }
12136 break;
12137 case 0x7c: /* URSQRTE */
12138 if (size == 3) {
12139 unallocated_encoding(s);
12140 return;
12141 }
12142 need_fpstatus = true;
12143 break;
12144 case 0x1e: /* FRINT32Z */
12145 case 0x1f: /* FRINT64Z */
12146 need_rmode = true;
12147 rmode = FPROUNDING_ZERO;
12148 /* fall through */
12149 case 0x5e: /* FRINT32X */
12150 case 0x5f: /* FRINT64X */
12151 need_fpstatus = true;
12152 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12153 unallocated_encoding(s);
12154 return;
12155 }
12156 break;
12157 default:
12158 unallocated_encoding(s);
12159 return;
12160 }
12161 break;
12162 }
12163 default:
12164 unallocated_encoding(s);
12165 return;
12166 }
12167
12168 if (!fp_access_check(s)) {
12169 return;
12170 }
12171
12172 if (need_fpstatus || need_rmode) {
12173 tcg_fpstatus = get_fpstatus_ptr(false);
12174 } else {
12175 tcg_fpstatus = NULL;
12176 }
12177 if (need_rmode) {
12178 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12179 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12180 } else {
12181 tcg_rmode = NULL;
12182 }
12183
12184 switch (opcode) {
12185 case 0x5:
12186 if (u && size == 0) { /* NOT */
12187 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12188 return;
12189 }
12190 break;
12191 case 0xb:
12192 if (u) { /* ABS, NEG */
12193 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12194 } else {
12195 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12196 }
12197 return;
12198 }
12199
12200 if (size == 3) {
12201 /* All 64-bit element operations can be shared with scalar 2misc */
12202 int pass;
12203
12204 /* Coverity claims (size == 3 && !is_q) has been eliminated
12205 * from all paths leading to here.
12206 */
12207 tcg_debug_assert(is_q);
12208 for (pass = 0; pass < 2; pass++) {
12209 TCGv_i64 tcg_op = tcg_temp_new_i64();
12210 TCGv_i64 tcg_res = tcg_temp_new_i64();
12211
12212 read_vec_element(s, tcg_op, rn, pass, MO_64);
12213
12214 handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12215 tcg_rmode, tcg_fpstatus);
12216
12217 write_vec_element(s, tcg_res, rd, pass, MO_64);
12218
12219 tcg_temp_free_i64(tcg_res);
12220 tcg_temp_free_i64(tcg_op);
12221 }
12222 } else {
12223 int pass;
12224
12225 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12226 TCGv_i32 tcg_op = tcg_temp_new_i32();
12227 TCGv_i32 tcg_res = tcg_temp_new_i32();
12228 TCGCond cond;
12229
12230 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12231
12232 if (size == 2) {
12233 /* Special cases for 32 bit elements */
12234 switch (opcode) {
12235 case 0xa: /* CMLT */
12236 /* 32 bit integer comparison against zero, result is
12237 * test ? (2^32 - 1) : 0. We implement via setcond(test)
12238 * and inverting.
12239 */
12240 cond = TCG_COND_LT;
12241 do_cmop:
12242 tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
12243 tcg_gen_neg_i32(tcg_res, tcg_res);
12244 break;
12245 case 0x8: /* CMGT, CMGE */
12246 cond = u ? TCG_COND_GE : TCG_COND_GT;
12247 goto do_cmop;
12248 case 0x9: /* CMEQ, CMLE */
12249 cond = u ? TCG_COND_LE : TCG_COND_EQ;
12250 goto do_cmop;
12251 case 0x4: /* CLS */
12252 if (u) {
12253 tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12254 } else {
12255 tcg_gen_clrsb_i32(tcg_res, tcg_op);
12256 }
12257 break;
12258 case 0x7: /* SQABS, SQNEG */
12259 if (u) {
12260 gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12261 } else {
12262 gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12263 }
12264 break;
12265 case 0x2f: /* FABS */
12266 gen_helper_vfp_abss(tcg_res, tcg_op);
12267 break;
12268 case 0x6f: /* FNEG */
12269 gen_helper_vfp_negs(tcg_res, tcg_op);
12270 break;
12271 case 0x7f: /* FSQRT */
12272 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12273 break;
12274 case 0x1a: /* FCVTNS */
12275 case 0x1b: /* FCVTMS */
12276 case 0x1c: /* FCVTAS */
12277 case 0x3a: /* FCVTPS */
12278 case 0x3b: /* FCVTZS */
12279 {
12280 TCGv_i32 tcg_shift = tcg_const_i32(0);
12281 gen_helper_vfp_tosls(tcg_res, tcg_op,
12282 tcg_shift, tcg_fpstatus);
12283 tcg_temp_free_i32(tcg_shift);
12284 break;
12285 }
12286 case 0x5a: /* FCVTNU */
12287 case 0x5b: /* FCVTMU */
12288 case 0x5c: /* FCVTAU */
12289 case 0x7a: /* FCVTPU */
12290 case 0x7b: /* FCVTZU */
12291 {
12292 TCGv_i32 tcg_shift = tcg_const_i32(0);
12293 gen_helper_vfp_touls(tcg_res, tcg_op,
12294 tcg_shift, tcg_fpstatus);
12295 tcg_temp_free_i32(tcg_shift);
12296 break;
12297 }
12298 case 0x18: /* FRINTN */
12299 case 0x19: /* FRINTM */
12300 case 0x38: /* FRINTP */
12301 case 0x39: /* FRINTZ */
12302 case 0x58: /* FRINTA */
12303 case 0x79: /* FRINTI */
12304 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12305 break;
12306 case 0x59: /* FRINTX */
12307 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12308 break;
12309 case 0x7c: /* URSQRTE */
12310 gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
12311 break;
12312 case 0x1e: /* FRINT32Z */
12313 case 0x5e: /* FRINT32X */
12314 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12315 break;
12316 case 0x1f: /* FRINT64Z */
12317 case 0x5f: /* FRINT64X */
12318 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12319 break;
12320 default:
12321 g_assert_not_reached();
12322 }
12323 } else {
12324 /* Use helpers for 8 and 16 bit elements */
12325 switch (opcode) {
12326 case 0x5: /* CNT, RBIT */
12327 /* For these two insns size is part of the opcode specifier
12328 * (handled earlier); they always operate on byte elements.
12329 */
12330 if (u) {
12331 gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12332 } else {
12333 gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12334 }
12335 break;
12336 case 0x7: /* SQABS, SQNEG */
12337 {
12338 NeonGenOneOpEnvFn *genfn;
12339 static NeonGenOneOpEnvFn * const fns[2][2] = {
12340 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12341 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12342 };
12343 genfn = fns[size][u];
12344 genfn(tcg_res, cpu_env, tcg_op);
12345 break;
12346 }
12347 case 0x8: /* CMGT, CMGE */
12348 case 0x9: /* CMEQ, CMLE */
12349 case 0xa: /* CMLT */
12350 {
12351 static NeonGenTwoOpFn * const fns[3][2] = {
12352 { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
12353 { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
12354 { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
12355 };
12356 NeonGenTwoOpFn *genfn;
12357 int comp;
12358 bool reverse;
12359 TCGv_i32 tcg_zero = tcg_const_i32(0);
12360
12361 /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
12362 comp = (opcode - 0x8) * 2 + u;
12363 /* ...but LE, LT are implemented as reverse GE, GT */
12364 reverse = (comp > 2);
12365 if (reverse) {
12366 comp = 4 - comp;
12367 }
12368 genfn = fns[comp][size];
12369 if (reverse) {
12370 genfn(tcg_res, tcg_zero, tcg_op);
12371 } else {
12372 genfn(tcg_res, tcg_op, tcg_zero);
12373 }
12374 tcg_temp_free_i32(tcg_zero);
12375 break;
12376 }
12377 case 0x4: /* CLS, CLZ */
12378 if (u) {
12379 if (size == 0) {
12380 gen_helper_neon_clz_u8(tcg_res, tcg_op);
12381 } else {
12382 gen_helper_neon_clz_u16(tcg_res, tcg_op);
12383 }
12384 } else {
12385 if (size == 0) {
12386 gen_helper_neon_cls_s8(tcg_res, tcg_op);
12387 } else {
12388 gen_helper_neon_cls_s16(tcg_res, tcg_op);
12389 }
12390 }
12391 break;
12392 default:
12393 g_assert_not_reached();
12394 }
12395 }
12396
12397 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12398
12399 tcg_temp_free_i32(tcg_res);
12400 tcg_temp_free_i32(tcg_op);
12401 }
12402 }
12403 clear_vec_high(s, is_q, rd);
12404
12405 if (need_rmode) {
12406 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12407 tcg_temp_free_i32(tcg_rmode);
12408 }
12409 if (need_fpstatus) {
12410 tcg_temp_free_ptr(tcg_fpstatus);
12411 }
12412 }
12413
12414 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12415 *
12416 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0
12417 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12418 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd |
12419 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12420 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12421 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12422 *
12423 * This actually covers two groups where scalar access is governed by
12424 * bit 28. A bunch of the instructions (float to integral) only exist
12425 * in the vector form and are un-allocated for the scalar decode. Also
12426 * in the scalar decode Q is always 1.
12427 */
12428 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12429 {
12430 int fpop, opcode, a, u;
12431 int rn, rd;
12432 bool is_q;
12433 bool is_scalar;
12434 bool only_in_vector = false;
12435
12436 int pass;
12437 TCGv_i32 tcg_rmode = NULL;
12438 TCGv_ptr tcg_fpstatus = NULL;
12439 bool need_rmode = false;
12440 bool need_fpst = true;
12441 int rmode;
12442
12443 if (!dc_isar_feature(aa64_fp16, s)) {
12444 unallocated_encoding(s);
12445 return;
12446 }
12447
12448 rd = extract32(insn, 0, 5);
12449 rn = extract32(insn, 5, 5);
12450
12451 a = extract32(insn, 23, 1);
12452 u = extract32(insn, 29, 1);
12453 is_scalar = extract32(insn, 28, 1);
12454 is_q = extract32(insn, 30, 1);
12455
12456 opcode = extract32(insn, 12, 5);
12457 fpop = deposit32(opcode, 5, 1, a);
12458 fpop = deposit32(fpop, 6, 1, u);
12459
12460 rd = extract32(insn, 0, 5);
12461 rn = extract32(insn, 5, 5);
12462
12463 switch (fpop) {
12464 case 0x1d: /* SCVTF */
12465 case 0x5d: /* UCVTF */
12466 {
12467 int elements;
12468
12469 if (is_scalar) {
12470 elements = 1;
12471 } else {
12472 elements = (is_q ? 8 : 4);
12473 }
12474
12475 if (!fp_access_check(s)) {
12476 return;
12477 }
12478 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12479 return;
12480 }
12481 break;
12482 case 0x2c: /* FCMGT (zero) */
12483 case 0x2d: /* FCMEQ (zero) */
12484 case 0x2e: /* FCMLT (zero) */
12485 case 0x6c: /* FCMGE (zero) */
12486 case 0x6d: /* FCMLE (zero) */
12487 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12488 return;
12489 case 0x3d: /* FRECPE */
12490 case 0x3f: /* FRECPX */
12491 break;
12492 case 0x18: /* FRINTN */
12493 need_rmode = true;
12494 only_in_vector = true;
12495 rmode = FPROUNDING_TIEEVEN;
12496 break;
12497 case 0x19: /* FRINTM */
12498 need_rmode = true;
12499 only_in_vector = true;
12500 rmode = FPROUNDING_NEGINF;
12501 break;
12502 case 0x38: /* FRINTP */
12503 need_rmode = true;
12504 only_in_vector = true;
12505 rmode = FPROUNDING_POSINF;
12506 break;
12507 case 0x39: /* FRINTZ */
12508 need_rmode = true;
12509 only_in_vector = true;
12510 rmode = FPROUNDING_ZERO;
12511 break;
12512 case 0x58: /* FRINTA */
12513 need_rmode = true;
12514 only_in_vector = true;
12515 rmode = FPROUNDING_TIEAWAY;
12516 break;
12517 case 0x59: /* FRINTX */
12518 case 0x79: /* FRINTI */
12519 only_in_vector = true;
12520 /* current rounding mode */
12521 break;
12522 case 0x1a: /* FCVTNS */
12523 need_rmode = true;
12524 rmode = FPROUNDING_TIEEVEN;
12525 break;
12526 case 0x1b: /* FCVTMS */
12527 need_rmode = true;
12528 rmode = FPROUNDING_NEGINF;
12529 break;
12530 case 0x1c: /* FCVTAS */
12531 need_rmode = true;
12532 rmode = FPROUNDING_TIEAWAY;
12533 break;
12534 case 0x3a: /* FCVTPS */
12535 need_rmode = true;
12536 rmode = FPROUNDING_POSINF;
12537 break;
12538 case 0x3b: /* FCVTZS */
12539 need_rmode = true;
12540 rmode = FPROUNDING_ZERO;
12541 break;
12542 case 0x5a: /* FCVTNU */
12543 need_rmode = true;
12544 rmode = FPROUNDING_TIEEVEN;
12545 break;
12546 case 0x5b: /* FCVTMU */
12547 need_rmode = true;
12548 rmode = FPROUNDING_NEGINF;
12549 break;
12550 case 0x5c: /* FCVTAU */
12551 need_rmode = true;
12552 rmode = FPROUNDING_TIEAWAY;
12553 break;
12554 case 0x7a: /* FCVTPU */
12555 need_rmode = true;
12556 rmode = FPROUNDING_POSINF;
12557 break;
12558 case 0x7b: /* FCVTZU */
12559 need_rmode = true;
12560 rmode = FPROUNDING_ZERO;
12561 break;
12562 case 0x2f: /* FABS */
12563 case 0x6f: /* FNEG */
12564 need_fpst = false;
12565 break;
12566 case 0x7d: /* FRSQRTE */
12567 case 0x7f: /* FSQRT (vector) */
12568 break;
12569 default:
12570 fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
12571 g_assert_not_reached();
12572 }
12573
12574
12575 /* Check additional constraints for the scalar encoding */
12576 if (is_scalar) {
12577 if (!is_q) {
12578 unallocated_encoding(s);
12579 return;
12580 }
12581 /* FRINTxx is only in the vector form */
12582 if (only_in_vector) {
12583 unallocated_encoding(s);
12584 return;
12585 }
12586 }
12587
12588 if (!fp_access_check(s)) {
12589 return;
12590 }
12591
12592 if (need_rmode || need_fpst) {
12593 tcg_fpstatus = get_fpstatus_ptr(true);
12594 }
12595
12596 if (need_rmode) {
12597 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12598 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12599 }
12600
12601 if (is_scalar) {
12602 TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12603 TCGv_i32 tcg_res = tcg_temp_new_i32();
12604
12605 switch (fpop) {
12606 case 0x1a: /* FCVTNS */
12607 case 0x1b: /* FCVTMS */
12608 case 0x1c: /* FCVTAS */
12609 case 0x3a: /* FCVTPS */
12610 case 0x3b: /* FCVTZS */
12611 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12612 break;
12613 case 0x3d: /* FRECPE */
12614 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12615 break;
12616 case 0x3f: /* FRECPX */
12617 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12618 break;
12619 case 0x5a: /* FCVTNU */
12620 case 0x5b: /* FCVTMU */
12621 case 0x5c: /* FCVTAU */
12622 case 0x7a: /* FCVTPU */
12623 case 0x7b: /* FCVTZU */
12624 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12625 break;
12626 case 0x6f: /* FNEG */
12627 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12628 break;
12629 case 0x7d: /* FRSQRTE */
12630 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12631 break;
12632 default:
12633 g_assert_not_reached();
12634 }
12635
12636 /* limit any sign extension going on */
12637 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12638 write_fp_sreg(s, rd, tcg_res);
12639
12640 tcg_temp_free_i32(tcg_res);
12641 tcg_temp_free_i32(tcg_op);
12642 } else {
12643 for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12644 TCGv_i32 tcg_op = tcg_temp_new_i32();
12645 TCGv_i32 tcg_res = tcg_temp_new_i32();
12646
12647 read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12648
12649 switch (fpop) {
12650 case 0x1a: /* FCVTNS */
12651 case 0x1b: /* FCVTMS */
12652 case 0x1c: /* FCVTAS */
12653 case 0x3a: /* FCVTPS */
12654 case 0x3b: /* FCVTZS */
12655 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12656 break;
12657 case 0x3d: /* FRECPE */
12658 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12659 break;
12660 case 0x5a: /* FCVTNU */
12661 case 0x5b: /* FCVTMU */
12662 case 0x5c: /* FCVTAU */
12663 case 0x7a: /* FCVTPU */
12664 case 0x7b: /* FCVTZU */
12665 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12666 break;
12667 case 0x18: /* FRINTN */
12668 case 0x19: /* FRINTM */
12669 case 0x38: /* FRINTP */
12670 case 0x39: /* FRINTZ */
12671 case 0x58: /* FRINTA */
12672 case 0x79: /* FRINTI */
12673 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12674 break;
12675 case 0x59: /* FRINTX */
12676 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12677 break;
12678 case 0x2f: /* FABS */
12679 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12680 break;
12681 case 0x6f: /* FNEG */
12682 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12683 break;
12684 case 0x7d: /* FRSQRTE */
12685 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12686 break;
12687 case 0x7f: /* FSQRT */
12688 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12689 break;
12690 default:
12691 g_assert_not_reached();
12692 }
12693
12694 write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12695
12696 tcg_temp_free_i32(tcg_res);
12697 tcg_temp_free_i32(tcg_op);
12698 }
12699
12700 clear_vec_high(s, is_q, rd);
12701 }
12702
12703 if (tcg_rmode) {
12704 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12705 tcg_temp_free_i32(tcg_rmode);
12706 }
12707
12708 if (tcg_fpstatus) {
12709 tcg_temp_free_ptr(tcg_fpstatus);
12710 }
12711 }
12712
12713 /* AdvSIMD scalar x indexed element
12714 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
12715 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12716 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
12717 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12718 * AdvSIMD vector x indexed element
12719 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
12720 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12721 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
12722 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12723 */
12724 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12725 {
12726 /* This encoding has two kinds of instruction:
12727 * normal, where we perform elt x idxelt => elt for each
12728 * element in the vector
12729 * long, where we perform elt x idxelt and generate a result of
12730 * double the width of the input element
12731 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12732 */
12733 bool is_scalar = extract32(insn, 28, 1);
12734 bool is_q = extract32(insn, 30, 1);
12735 bool u = extract32(insn, 29, 1);
12736 int size = extract32(insn, 22, 2);
12737 int l = extract32(insn, 21, 1);
12738 int m = extract32(insn, 20, 1);
12739 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12740 int rm = extract32(insn, 16, 4);
12741 int opcode = extract32(insn, 12, 4);
12742 int h = extract32(insn, 11, 1);
12743 int rn = extract32(insn, 5, 5);
12744 int rd = extract32(insn, 0, 5);
12745 bool is_long = false;
12746 int is_fp = 0;
12747 bool is_fp16 = false;
12748 int index;
12749 TCGv_ptr fpst;
12750
12751 switch (16 * u + opcode) {
12752 case 0x08: /* MUL */
12753 case 0x10: /* MLA */
12754 case 0x14: /* MLS */
12755 if (is_scalar) {
12756 unallocated_encoding(s);
12757 return;
12758 }
12759 break;
12760 case 0x02: /* SMLAL, SMLAL2 */
12761 case 0x12: /* UMLAL, UMLAL2 */
12762 case 0x06: /* SMLSL, SMLSL2 */
12763 case 0x16: /* UMLSL, UMLSL2 */
12764 case 0x0a: /* SMULL, SMULL2 */
12765 case 0x1a: /* UMULL, UMULL2 */
12766 if (is_scalar) {
12767 unallocated_encoding(s);
12768 return;
12769 }
12770 is_long = true;
12771 break;
12772 case 0x03: /* SQDMLAL, SQDMLAL2 */
12773 case 0x07: /* SQDMLSL, SQDMLSL2 */
12774 case 0x0b: /* SQDMULL, SQDMULL2 */
12775 is_long = true;
12776 break;
12777 case 0x0c: /* SQDMULH */
12778 case 0x0d: /* SQRDMULH */
12779 break;
12780 case 0x01: /* FMLA */
12781 case 0x05: /* FMLS */
12782 case 0x09: /* FMUL */
12783 case 0x19: /* FMULX */
12784 is_fp = 1;
12785 break;
12786 case 0x1d: /* SQRDMLAH */
12787 case 0x1f: /* SQRDMLSH */
12788 if (!dc_isar_feature(aa64_rdm, s)) {
12789 unallocated_encoding(s);
12790 return;
12791 }
12792 break;
12793 case 0x0e: /* SDOT */
12794 case 0x1e: /* UDOT */
12795 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12796 unallocated_encoding(s);
12797 return;
12798 }
12799 break;
12800 case 0x11: /* FCMLA #0 */
12801 case 0x13: /* FCMLA #90 */
12802 case 0x15: /* FCMLA #180 */
12803 case 0x17: /* FCMLA #270 */
12804 if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12805 unallocated_encoding(s);
12806 return;
12807 }
12808 is_fp = 2;
12809 break;
12810 case 0x00: /* FMLAL */
12811 case 0x04: /* FMLSL */
12812 case 0x18: /* FMLAL2 */
12813 case 0x1c: /* FMLSL2 */
12814 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12815 unallocated_encoding(s);
12816 return;
12817 }
12818 size = MO_16;
12819 /* is_fp, but we pass cpu_env not fp_status. */
12820 break;
12821 default:
12822 unallocated_encoding(s);
12823 return;
12824 }
12825
12826 switch (is_fp) {
12827 case 1: /* normal fp */
12828 /* convert insn encoded size to MemOp size */
12829 switch (size) {
12830 case 0: /* half-precision */
12831 size = MO_16;
12832 is_fp16 = true;
12833 break;
12834 case MO_32: /* single precision */
12835 case MO_64: /* double precision */
12836 break;
12837 default:
12838 unallocated_encoding(s);
12839 return;
12840 }
12841 break;
12842
12843 case 2: /* complex fp */
12844 /* Each indexable element is a complex pair. */
12845 size += 1;
12846 switch (size) {
12847 case MO_32:
12848 if (h && !is_q) {
12849 unallocated_encoding(s);
12850 return;
12851 }
12852 is_fp16 = true;
12853 break;
12854 case MO_64:
12855 break;
12856 default:
12857 unallocated_encoding(s);
12858 return;
12859 }
12860 break;
12861
12862 default: /* integer */
12863 switch (size) {
12864 case MO_8:
12865 case MO_64:
12866 unallocated_encoding(s);
12867 return;
12868 }
12869 break;
12870 }
12871 if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12872 unallocated_encoding(s);
12873 return;
12874 }
12875
12876 /* Given MemOp size, adjust register and indexing. */
12877 switch (size) {
12878 case MO_16:
12879 index = h << 2 | l << 1 | m;
12880 break;
12881 case MO_32:
12882 index = h << 1 | l;
12883 rm |= m << 4;
12884 break;
12885 case MO_64:
12886 if (l || !is_q) {
12887 unallocated_encoding(s);
12888 return;
12889 }
12890 index = h;
12891 rm |= m << 4;
12892 break;
12893 default:
12894 g_assert_not_reached();
12895 }
12896
12897 if (!fp_access_check(s)) {
12898 return;
12899 }
12900
12901 if (is_fp) {
12902 fpst = get_fpstatus_ptr(is_fp16);
12903 } else {
12904 fpst = NULL;
12905 }
12906
12907 switch (16 * u + opcode) {
12908 case 0x0e: /* SDOT */
12909 case 0x1e: /* UDOT */
12910 gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
12911 u ? gen_helper_gvec_udot_idx_b
12912 : gen_helper_gvec_sdot_idx_b);
12913 return;
12914 case 0x11: /* FCMLA #0 */
12915 case 0x13: /* FCMLA #90 */
12916 case 0x15: /* FCMLA #180 */
12917 case 0x17: /* FCMLA #270 */
12918 {
12919 int rot = extract32(insn, 13, 2);
12920 int data = (index << 2) | rot;
12921 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12922 vec_full_reg_offset(s, rn),
12923 vec_full_reg_offset(s, rm), fpst,
12924 is_q ? 16 : 8, vec_full_reg_size(s), data,
12925 size == MO_64
12926 ? gen_helper_gvec_fcmlas_idx
12927 : gen_helper_gvec_fcmlah_idx);
12928 tcg_temp_free_ptr(fpst);
12929 }
12930 return;
12931
12932 case 0x00: /* FMLAL */
12933 case 0x04: /* FMLSL */
12934 case 0x18: /* FMLAL2 */
12935 case 0x1c: /* FMLSL2 */
12936 {
12937 int is_s = extract32(opcode, 2, 1);
12938 int is_2 = u;
12939 int data = (index << 2) | (is_2 << 1) | is_s;
12940 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12941 vec_full_reg_offset(s, rn),
12942 vec_full_reg_offset(s, rm), cpu_env,
12943 is_q ? 16 : 8, vec_full_reg_size(s),
12944 data, gen_helper_gvec_fmlal_idx_a64);
12945 }
12946 return;
12947 }
12948
12949 if (size == 3) {
12950 TCGv_i64 tcg_idx = tcg_temp_new_i64();
12951 int pass;
12952
12953 assert(is_fp && is_q && !is_long);
12954
12955 read_vec_element(s, tcg_idx, rm, index, MO_64);
12956
12957 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12958 TCGv_i64 tcg_op = tcg_temp_new_i64();
12959 TCGv_i64 tcg_res = tcg_temp_new_i64();
12960
12961 read_vec_element(s, tcg_op, rn, pass, MO_64);
12962
12963 switch (16 * u + opcode) {
12964 case 0x05: /* FMLS */
12965 /* As usual for ARM, separate negation for fused multiply-add */
12966 gen_helper_vfp_negd(tcg_op, tcg_op);
12967 /* fall through */
12968 case 0x01: /* FMLA */
12969 read_vec_element(s, tcg_res, rd, pass, MO_64);
12970 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12971 break;
12972 case 0x09: /* FMUL */
12973 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12974 break;
12975 case 0x19: /* FMULX */
12976 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
12977 break;
12978 default:
12979 g_assert_not_reached();
12980 }
12981
12982 write_vec_element(s, tcg_res, rd, pass, MO_64);
12983 tcg_temp_free_i64(tcg_op);
12984 tcg_temp_free_i64(tcg_res);
12985 }
12986
12987 tcg_temp_free_i64(tcg_idx);
12988 clear_vec_high(s, !is_scalar, rd);
12989 } else if (!is_long) {
12990 /* 32 bit floating point, or 16 or 32 bit integer.
12991 * For the 16 bit scalar case we use the usual Neon helpers and
12992 * rely on the fact that 0 op 0 == 0 with no side effects.
12993 */
12994 TCGv_i32 tcg_idx = tcg_temp_new_i32();
12995 int pass, maxpasses;
12996
12997 if (is_scalar) {
12998 maxpasses = 1;
12999 } else {
13000 maxpasses = is_q ? 4 : 2;
13001 }
13002
13003 read_vec_element_i32(s, tcg_idx, rm, index, size);
13004
13005 if (size == 1 && !is_scalar) {
13006 /* The simplest way to handle the 16x16 indexed ops is to duplicate
13007 * the index into both halves of the 32 bit tcg_idx and then use
13008 * the usual Neon helpers.
13009 */
13010 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13011 }
13012
13013 for (pass = 0; pass < maxpasses; pass++) {
13014 TCGv_i32 tcg_op = tcg_temp_new_i32();
13015 TCGv_i32 tcg_res = tcg_temp_new_i32();
13016
13017 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13018
13019 switch (16 * u + opcode) {
13020 case 0x08: /* MUL */
13021 case 0x10: /* MLA */
13022 case 0x14: /* MLS */
13023 {
13024 static NeonGenTwoOpFn * const fns[2][2] = {
13025 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13026 { tcg_gen_add_i32, tcg_gen_sub_i32 },
13027 };
13028 NeonGenTwoOpFn *genfn;
13029 bool is_sub = opcode == 0x4;
13030
13031 if (size == 1) {
13032 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13033 } else {
13034 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13035 }
13036 if (opcode == 0x8) {
13037 break;
13038 }
13039 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13040 genfn = fns[size - 1][is_sub];
13041 genfn(tcg_res, tcg_op, tcg_res);
13042 break;
13043 }
13044 case 0x05: /* FMLS */
13045 case 0x01: /* FMLA */
13046 read_vec_element_i32(s, tcg_res, rd, pass,
13047 is_scalar ? size : MO_32);
13048 switch (size) {
13049 case 1:
13050 if (opcode == 0x5) {
13051 /* As usual for ARM, separate negation for fused
13052 * multiply-add */
13053 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13054 }
13055 if (is_scalar) {
13056 gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13057 tcg_res, fpst);
13058 } else {
13059 gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13060 tcg_res, fpst);
13061 }
13062 break;
13063 case 2:
13064 if (opcode == 0x5) {
13065 /* As usual for ARM, separate negation for
13066 * fused multiply-add */
13067 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13068 }
13069 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13070 tcg_res, fpst);
13071 break;
13072 default:
13073 g_assert_not_reached();
13074 }
13075 break;
13076 case 0x09: /* FMUL */
13077 switch (size) {
13078 case 1:
13079 if (is_scalar) {
13080 gen_helper_advsimd_mulh(tcg_res, tcg_op,
13081 tcg_idx, fpst);
13082 } else {
13083 gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13084 tcg_idx, fpst);
13085 }
13086 break;
13087 case 2:
13088 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13089 break;
13090 default:
13091 g_assert_not_reached();
13092 }
13093 break;
13094 case 0x19: /* FMULX */
13095 switch (size) {
13096 case 1:
13097 if (is_scalar) {
13098 gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13099 tcg_idx, fpst);
13100 } else {
13101 gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13102 tcg_idx, fpst);
13103 }
13104 break;
13105 case 2:
13106 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13107 break;
13108 default:
13109 g_assert_not_reached();
13110 }
13111 break;
13112 case 0x0c: /* SQDMULH */
13113 if (size == 1) {
13114 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13115 tcg_op, tcg_idx);
13116 } else {
13117 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13118 tcg_op, tcg_idx);
13119 }
13120 break;
13121 case 0x0d: /* SQRDMULH */
13122 if (size == 1) {
13123 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13124 tcg_op, tcg_idx);
13125 } else {
13126 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13127 tcg_op, tcg_idx);
13128 }
13129 break;
13130 case 0x1d: /* SQRDMLAH */
13131 read_vec_element_i32(s, tcg_res, rd, pass,
13132 is_scalar ? size : MO_32);
13133 if (size == 1) {
13134 gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13135 tcg_op, tcg_idx, tcg_res);
13136 } else {
13137 gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13138 tcg_op, tcg_idx, tcg_res);
13139 }
13140 break;
13141 case 0x1f: /* SQRDMLSH */
13142 read_vec_element_i32(s, tcg_res, rd, pass,
13143 is_scalar ? size : MO_32);
13144 if (size == 1) {
13145 gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13146 tcg_op, tcg_idx, tcg_res);
13147 } else {
13148 gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13149 tcg_op, tcg_idx, tcg_res);
13150 }
13151 break;
13152 default:
13153 g_assert_not_reached();
13154 }
13155
13156 if (is_scalar) {
13157 write_fp_sreg(s, rd, tcg_res);
13158 } else {
13159 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13160 }
13161
13162 tcg_temp_free_i32(tcg_op);
13163 tcg_temp_free_i32(tcg_res);
13164 }
13165
13166 tcg_temp_free_i32(tcg_idx);
13167 clear_vec_high(s, is_q, rd);
13168 } else {
13169 /* long ops: 16x16->32 or 32x32->64 */
13170 TCGv_i64 tcg_res[2];
13171 int pass;
13172 bool satop = extract32(opcode, 0, 1);
13173 MemOp memop = MO_32;
13174
13175 if (satop || !u) {
13176 memop |= MO_SIGN;
13177 }
13178
13179 if (size == 2) {
13180 TCGv_i64 tcg_idx = tcg_temp_new_i64();
13181
13182 read_vec_element(s, tcg_idx, rm, index, memop);
13183
13184 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13185 TCGv_i64 tcg_op = tcg_temp_new_i64();
13186 TCGv_i64 tcg_passres;
13187 int passelt;
13188
13189 if (is_scalar) {
13190 passelt = 0;
13191 } else {
13192 passelt = pass + (is_q * 2);
13193 }
13194
13195 read_vec_element(s, tcg_op, rn, passelt, memop);
13196
13197 tcg_res[pass] = tcg_temp_new_i64();
13198
13199 if (opcode == 0xa || opcode == 0xb) {
13200 /* Non-accumulating ops */
13201 tcg_passres = tcg_res[pass];
13202 } else {
13203 tcg_passres = tcg_temp_new_i64();
13204 }
13205
13206 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13207 tcg_temp_free_i64(tcg_op);
13208
13209 if (satop) {
13210 /* saturating, doubling */
13211 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13212 tcg_passres, tcg_passres);
13213 }
13214
13215 if (opcode == 0xa || opcode == 0xb) {
13216 continue;
13217 }
13218
13219 /* Accumulating op: handle accumulate step */
13220 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13221
13222 switch (opcode) {
13223 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13224 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13225 break;
13226 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13227 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13228 break;
13229 case 0x7: /* SQDMLSL, SQDMLSL2 */
13230 tcg_gen_neg_i64(tcg_passres, tcg_passres);
13231 /* fall through */
13232 case 0x3: /* SQDMLAL, SQDMLAL2 */
13233 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13234 tcg_res[pass],
13235 tcg_passres);
13236 break;
13237 default:
13238 g_assert_not_reached();
13239 }
13240 tcg_temp_free_i64(tcg_passres);
13241 }
13242 tcg_temp_free_i64(tcg_idx);
13243
13244 clear_vec_high(s, !is_scalar, rd);
13245 } else {
13246 TCGv_i32 tcg_idx = tcg_temp_new_i32();
13247
13248 assert(size == 1);
13249 read_vec_element_i32(s, tcg_idx, rm, index, size);
13250
13251 if (!is_scalar) {
13252 /* The simplest way to handle the 16x16 indexed ops is to
13253 * duplicate the index into both halves of the 32 bit tcg_idx
13254 * and then use the usual Neon helpers.
13255 */
13256 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13257 }
13258
13259 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13260 TCGv_i32 tcg_op = tcg_temp_new_i32();
13261 TCGv_i64 tcg_passres;
13262
13263 if (is_scalar) {
13264 read_vec_element_i32(s, tcg_op, rn, pass, size);
13265 } else {
13266 read_vec_element_i32(s, tcg_op, rn,
13267 pass + (is_q * 2), MO_32);
13268 }
13269
13270 tcg_res[pass] = tcg_temp_new_i64();
13271
13272 if (opcode == 0xa || opcode == 0xb) {
13273 /* Non-accumulating ops */
13274 tcg_passres = tcg_res[pass];
13275 } else {
13276 tcg_passres = tcg_temp_new_i64();
13277 }
13278
13279 if (memop & MO_SIGN) {
13280 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13281 } else {
13282 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13283 }
13284 if (satop) {
13285 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13286 tcg_passres, tcg_passres);
13287 }
13288 tcg_temp_free_i32(tcg_op);
13289
13290 if (opcode == 0xa || opcode == 0xb) {
13291 continue;
13292 }
13293
13294 /* Accumulating op: handle accumulate step */
13295 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13296
13297 switch (opcode) {
13298 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13299 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13300 tcg_passres);
13301 break;
13302 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13303 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13304 tcg_passres);
13305 break;
13306 case 0x7: /* SQDMLSL, SQDMLSL2 */
13307 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13308 /* fall through */
13309 case 0x3: /* SQDMLAL, SQDMLAL2 */
13310 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13311 tcg_res[pass],
13312 tcg_passres);
13313 break;
13314 default:
13315 g_assert_not_reached();
13316 }
13317 tcg_temp_free_i64(tcg_passres);
13318 }
13319 tcg_temp_free_i32(tcg_idx);
13320
13321 if (is_scalar) {
13322 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13323 }
13324 }
13325
13326 if (is_scalar) {
13327 tcg_res[1] = tcg_const_i64(0);
13328 }
13329
13330 for (pass = 0; pass < 2; pass++) {
13331 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13332 tcg_temp_free_i64(tcg_res[pass]);
13333 }
13334 }
13335
13336 if (fpst) {
13337 tcg_temp_free_ptr(fpst);
13338 }
13339 }
13340
13341 /* Crypto AES
13342 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
13343 * +-----------------+------+-----------+--------+-----+------+------+
13344 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
13345 * +-----------------+------+-----------+--------+-----+------+------+
13346 */
13347 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13348 {
13349 int size = extract32(insn, 22, 2);
13350 int opcode = extract32(insn, 12, 5);
13351 int rn = extract32(insn, 5, 5);
13352 int rd = extract32(insn, 0, 5);
13353 int decrypt;
13354 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13355 TCGv_i32 tcg_decrypt;
13356 CryptoThreeOpIntFn *genfn;
13357
13358 if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13359 unallocated_encoding(s);
13360 return;
13361 }
13362
13363 switch (opcode) {
13364 case 0x4: /* AESE */
13365 decrypt = 0;
13366 genfn = gen_helper_crypto_aese;
13367 break;
13368 case 0x6: /* AESMC */
13369 decrypt = 0;
13370 genfn = gen_helper_crypto_aesmc;
13371 break;
13372 case 0x5: /* AESD */
13373 decrypt = 1;
13374 genfn = gen_helper_crypto_aese;
13375 break;
13376 case 0x7: /* AESIMC */
13377 decrypt = 1;
13378 genfn = gen_helper_crypto_aesmc;
13379 break;
13380 default:
13381 unallocated_encoding(s);
13382 return;
13383 }
13384
13385 if (!fp_access_check(s)) {
13386 return;
13387 }
13388
13389 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13390 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13391 tcg_decrypt = tcg_const_i32(decrypt);
13392
13393 genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
13394
13395 tcg_temp_free_ptr(tcg_rd_ptr);
13396 tcg_temp_free_ptr(tcg_rn_ptr);
13397 tcg_temp_free_i32(tcg_decrypt);
13398 }
13399
13400 /* Crypto three-reg SHA
13401 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
13402 * +-----------------+------+---+------+---+--------+-----+------+------+
13403 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
13404 * +-----------------+------+---+------+---+--------+-----+------+------+
13405 */
13406 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13407 {
13408 int size = extract32(insn, 22, 2);
13409 int opcode = extract32(insn, 12, 3);
13410 int rm = extract32(insn, 16, 5);
13411 int rn = extract32(insn, 5, 5);
13412 int rd = extract32(insn, 0, 5);
13413 CryptoThreeOpFn *genfn;
13414 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13415 bool feature;
13416
13417 if (size != 0) {
13418 unallocated_encoding(s);
13419 return;
13420 }
13421
13422 switch (opcode) {
13423 case 0: /* SHA1C */
13424 case 1: /* SHA1P */
13425 case 2: /* SHA1M */
13426 case 3: /* SHA1SU0 */
13427 genfn = NULL;
13428 feature = dc_isar_feature(aa64_sha1, s);
13429 break;
13430 case 4: /* SHA256H */
13431 genfn = gen_helper_crypto_sha256h;
13432 feature = dc_isar_feature(aa64_sha256, s);
13433 break;
13434 case 5: /* SHA256H2 */
13435 genfn = gen_helper_crypto_sha256h2;
13436 feature = dc_isar_feature(aa64_sha256, s);
13437 break;
13438 case 6: /* SHA256SU1 */
13439 genfn = gen_helper_crypto_sha256su1;
13440 feature = dc_isar_feature(aa64_sha256, s);
13441 break;
13442 default:
13443 unallocated_encoding(s);
13444 return;
13445 }
13446
13447 if (!feature) {
13448 unallocated_encoding(s);
13449 return;
13450 }
13451
13452 if (!fp_access_check(s)) {
13453 return;
13454 }
13455
13456 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13457 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13458 tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13459
13460 if (genfn) {
13461 genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13462 } else {
13463 TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
13464
13465 gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
13466 tcg_rm_ptr, tcg_opcode);
13467 tcg_temp_free_i32(tcg_opcode);
13468 }
13469
13470 tcg_temp_free_ptr(tcg_rd_ptr);
13471 tcg_temp_free_ptr(tcg_rn_ptr);
13472 tcg_temp_free_ptr(tcg_rm_ptr);
13473 }
13474
13475 /* Crypto two-reg SHA
13476 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
13477 * +-----------------+------+-----------+--------+-----+------+------+
13478 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
13479 * +-----------------+------+-----------+--------+-----+------+------+
13480 */
13481 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13482 {
13483 int size = extract32(insn, 22, 2);
13484 int opcode = extract32(insn, 12, 5);
13485 int rn = extract32(insn, 5, 5);
13486 int rd = extract32(insn, 0, 5);
13487 CryptoTwoOpFn *genfn;
13488 bool feature;
13489 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13490
13491 if (size != 0) {
13492 unallocated_encoding(s);
13493 return;
13494 }
13495
13496 switch (opcode) {
13497 case 0: /* SHA1H */
13498 feature = dc_isar_feature(aa64_sha1, s);
13499 genfn = gen_helper_crypto_sha1h;
13500 break;
13501 case 1: /* SHA1SU1 */
13502 feature = dc_isar_feature(aa64_sha1, s);
13503 genfn = gen_helper_crypto_sha1su1;
13504 break;
13505 case 2: /* SHA256SU0 */
13506 feature = dc_isar_feature(aa64_sha256, s);
13507 genfn = gen_helper_crypto_sha256su0;
13508 break;
13509 default:
13510 unallocated_encoding(s);
13511 return;
13512 }
13513
13514 if (!feature) {
13515 unallocated_encoding(s);
13516 return;
13517 }
13518
13519 if (!fp_access_check(s)) {
13520 return;
13521 }
13522
13523 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13524 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13525
13526 genfn(tcg_rd_ptr, tcg_rn_ptr);
13527
13528 tcg_temp_free_ptr(tcg_rd_ptr);
13529 tcg_temp_free_ptr(tcg_rn_ptr);
13530 }
13531
13532 /* Crypto three-reg SHA512
13533 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
13534 * +-----------------------+------+---+---+-----+--------+------+------+
13535 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd |
13536 * +-----------------------+------+---+---+-----+--------+------+------+
13537 */
13538 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13539 {
13540 int opcode = extract32(insn, 10, 2);
13541 int o = extract32(insn, 14, 1);
13542 int rm = extract32(insn, 16, 5);
13543 int rn = extract32(insn, 5, 5);
13544 int rd = extract32(insn, 0, 5);
13545 bool feature;
13546 CryptoThreeOpFn *genfn;
13547
13548 if (o == 0) {
13549 switch (opcode) {
13550 case 0: /* SHA512H */
13551 feature = dc_isar_feature(aa64_sha512, s);
13552 genfn = gen_helper_crypto_sha512h;
13553 break;
13554 case 1: /* SHA512H2 */
13555 feature = dc_isar_feature(aa64_sha512, s);
13556 genfn = gen_helper_crypto_sha512h2;
13557 break;
13558 case 2: /* SHA512SU1 */
13559 feature = dc_isar_feature(aa64_sha512, s);
13560 genfn = gen_helper_crypto_sha512su1;
13561 break;
13562 case 3: /* RAX1 */
13563 feature = dc_isar_feature(aa64_sha3, s);
13564 genfn = NULL;
13565 break;
13566 }
13567 } else {
13568 switch (opcode) {
13569 case 0: /* SM3PARTW1 */
13570 feature = dc_isar_feature(aa64_sm3, s);
13571 genfn = gen_helper_crypto_sm3partw1;
13572 break;
13573 case 1: /* SM3PARTW2 */
13574 feature = dc_isar_feature(aa64_sm3, s);
13575 genfn = gen_helper_crypto_sm3partw2;
13576 break;
13577 case 2: /* SM4EKEY */
13578 feature = dc_isar_feature(aa64_sm4, s);
13579 genfn = gen_helper_crypto_sm4ekey;
13580 break;
13581 default:
13582 unallocated_encoding(s);
13583 return;
13584 }
13585 }
13586
13587 if (!feature) {
13588 unallocated_encoding(s);
13589 return;
13590 }
13591
13592 if (!fp_access_check(s)) {
13593 return;
13594 }
13595
13596 if (genfn) {
13597 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13598
13599 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13600 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13601 tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13602
13603 genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13604
13605 tcg_temp_free_ptr(tcg_rd_ptr);
13606 tcg_temp_free_ptr(tcg_rn_ptr);
13607 tcg_temp_free_ptr(tcg_rm_ptr);
13608 } else {
13609 TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13610 int pass;
13611
13612 tcg_op1 = tcg_temp_new_i64();
13613 tcg_op2 = tcg_temp_new_i64();
13614 tcg_res[0] = tcg_temp_new_i64();
13615 tcg_res[1] = tcg_temp_new_i64();
13616
13617 for (pass = 0; pass < 2; pass++) {
13618 read_vec_element(s, tcg_op1, rn, pass, MO_64);
13619 read_vec_element(s, tcg_op2, rm, pass, MO_64);
13620
13621 tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
13622 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13623 }
13624 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13625 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13626
13627 tcg_temp_free_i64(tcg_op1);
13628 tcg_temp_free_i64(tcg_op2);
13629 tcg_temp_free_i64(tcg_res[0]);
13630 tcg_temp_free_i64(tcg_res[1]);
13631 }
13632 }
13633
13634 /* Crypto two-reg SHA512
13635 * 31 12 11 10 9 5 4 0
13636 * +-----------------------------------------+--------+------+------+
13637 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd |
13638 * +-----------------------------------------+--------+------+------+
13639 */
13640 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13641 {
13642 int opcode = extract32(insn, 10, 2);
13643 int rn = extract32(insn, 5, 5);
13644 int rd = extract32(insn, 0, 5);
13645 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13646 bool feature;
13647 CryptoTwoOpFn *genfn;
13648
13649 switch (opcode) {
13650 case 0: /* SHA512SU0 */
13651 feature = dc_isar_feature(aa64_sha512, s);
13652 genfn = gen_helper_crypto_sha512su0;
13653 break;
13654 case 1: /* SM4E */
13655 feature = dc_isar_feature(aa64_sm4, s);
13656 genfn = gen_helper_crypto_sm4e;
13657 break;
13658 default:
13659 unallocated_encoding(s);
13660 return;
13661 }
13662
13663 if (!feature) {
13664 unallocated_encoding(s);
13665 return;
13666 }
13667
13668 if (!fp_access_check(s)) {
13669 return;
13670 }
13671
13672 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13673 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13674
13675 genfn(tcg_rd_ptr, tcg_rn_ptr);
13676
13677 tcg_temp_free_ptr(tcg_rd_ptr);
13678 tcg_temp_free_ptr(tcg_rn_ptr);
13679 }
13680
13681 /* Crypto four-register
13682 * 31 23 22 21 20 16 15 14 10 9 5 4 0
13683 * +-------------------+-----+------+---+------+------+------+
13684 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd |
13685 * +-------------------+-----+------+---+------+------+------+
13686 */
13687 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13688 {
13689 int op0 = extract32(insn, 21, 2);
13690 int rm = extract32(insn, 16, 5);
13691 int ra = extract32(insn, 10, 5);
13692 int rn = extract32(insn, 5, 5);
13693 int rd = extract32(insn, 0, 5);
13694 bool feature;
13695
13696 switch (op0) {
13697 case 0: /* EOR3 */
13698 case 1: /* BCAX */
13699 feature = dc_isar_feature(aa64_sha3, s);
13700 break;
13701 case 2: /* SM3SS1 */
13702 feature = dc_isar_feature(aa64_sm3, s);
13703 break;
13704 default:
13705 unallocated_encoding(s);
13706 return;
13707 }
13708
13709 if (!feature) {
13710 unallocated_encoding(s);
13711 return;
13712 }
13713
13714 if (!fp_access_check(s)) {
13715 return;
13716 }
13717
13718 if (op0 < 2) {
13719 TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13720 int pass;
13721
13722 tcg_op1 = tcg_temp_new_i64();
13723 tcg_op2 = tcg_temp_new_i64();
13724 tcg_op3 = tcg_temp_new_i64();
13725 tcg_res[0] = tcg_temp_new_i64();
13726 tcg_res[1] = tcg_temp_new_i64();
13727
13728 for (pass = 0; pass < 2; pass++) {
13729 read_vec_element(s, tcg_op1, rn, pass, MO_64);
13730 read_vec_element(s, tcg_op2, rm, pass, MO_64);
13731 read_vec_element(s, tcg_op3, ra, pass, MO_64);
13732
13733 if (op0 == 0) {
13734 /* EOR3 */
13735 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13736 } else {
13737 /* BCAX */
13738 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13739 }
13740 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13741 }
13742 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13743 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13744
13745 tcg_temp_free_i64(tcg_op1);
13746 tcg_temp_free_i64(tcg_op2);
13747 tcg_temp_free_i64(tcg_op3);
13748 tcg_temp_free_i64(tcg_res[0]);
13749 tcg_temp_free_i64(tcg_res[1]);
13750 } else {
13751 TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13752
13753 tcg_op1 = tcg_temp_new_i32();
13754 tcg_op2 = tcg_temp_new_i32();
13755 tcg_op3 = tcg_temp_new_i32();
13756 tcg_res = tcg_temp_new_i32();
13757 tcg_zero = tcg_const_i32(0);
13758
13759 read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13760 read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13761 read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13762
13763 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13764 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13765 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13766 tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13767
13768 write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13769 write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13770 write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13771 write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13772
13773 tcg_temp_free_i32(tcg_op1);
13774 tcg_temp_free_i32(tcg_op2);
13775 tcg_temp_free_i32(tcg_op3);
13776 tcg_temp_free_i32(tcg_res);
13777 tcg_temp_free_i32(tcg_zero);
13778 }
13779 }
13780
13781 /* Crypto XAR
13782 * 31 21 20 16 15 10 9 5 4 0
13783 * +-----------------------+------+--------+------+------+
13784 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd |
13785 * +-----------------------+------+--------+------+------+
13786 */
13787 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13788 {
13789 int rm = extract32(insn, 16, 5);
13790 int imm6 = extract32(insn, 10, 6);
13791 int rn = extract32(insn, 5, 5);
13792 int rd = extract32(insn, 0, 5);
13793 TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13794 int pass;
13795
13796 if (!dc_isar_feature(aa64_sha3, s)) {
13797 unallocated_encoding(s);
13798 return;
13799 }
13800
13801 if (!fp_access_check(s)) {
13802 return;
13803 }
13804
13805 tcg_op1 = tcg_temp_new_i64();
13806 tcg_op2 = tcg_temp_new_i64();
13807 tcg_res[0] = tcg_temp_new_i64();
13808 tcg_res[1] = tcg_temp_new_i64();
13809
13810 for (pass = 0; pass < 2; pass++) {
13811 read_vec_element(s, tcg_op1, rn, pass, MO_64);
13812 read_vec_element(s, tcg_op2, rm, pass, MO_64);
13813
13814 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
13815 tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
13816 }
13817 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13818 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13819
13820 tcg_temp_free_i64(tcg_op1);
13821 tcg_temp_free_i64(tcg_op2);
13822 tcg_temp_free_i64(tcg_res[0]);
13823 tcg_temp_free_i64(tcg_res[1]);
13824 }
13825
13826 /* Crypto three-reg imm2
13827 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
13828 * +-----------------------+------+-----+------+--------+------+------+
13829 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd |
13830 * +-----------------------+------+-----+------+--------+------+------+
13831 */
13832 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13833 {
13834 int opcode = extract32(insn, 10, 2);
13835 int imm2 = extract32(insn, 12, 2);
13836 int rm = extract32(insn, 16, 5);
13837 int rn = extract32(insn, 5, 5);
13838 int rd = extract32(insn, 0, 5);
13839 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13840 TCGv_i32 tcg_imm2, tcg_opcode;
13841
13842 if (!dc_isar_feature(aa64_sm3, s)) {
13843 unallocated_encoding(s);
13844 return;
13845 }
13846
13847 if (!fp_access_check(s)) {
13848 return;
13849 }
13850
13851 tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13852 tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13853 tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13854 tcg_imm2 = tcg_const_i32(imm2);
13855 tcg_opcode = tcg_const_i32(opcode);
13856
13857 gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
13858 tcg_opcode);
13859
13860 tcg_temp_free_ptr(tcg_rd_ptr);
13861 tcg_temp_free_ptr(tcg_rn_ptr);
13862 tcg_temp_free_ptr(tcg_rm_ptr);
13863 tcg_temp_free_i32(tcg_imm2);
13864 tcg_temp_free_i32(tcg_opcode);
13865 }
13866
13867 /* C3.6 Data processing - SIMD, inc Crypto
13868 *
13869 * As the decode gets a little complex we are using a table based
13870 * approach for this part of the decode.
13871 */
13872 static const AArch64DecodeTable data_proc_simd[] = {
13873 /* pattern , mask , fn */
13874 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13875 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13876 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13877 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13878 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13879 { 0x0e000400, 0x9fe08400, disas_simd_copy },
13880 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13881 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13882 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13883 { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13884 { 0x0e000000, 0xbf208c00, disas_simd_tb },
13885 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13886 { 0x2e000000, 0xbf208400, disas_simd_ext },
13887 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13888 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13889 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13890 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13891 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13892 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13893 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13894 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13895 { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13896 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13897 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13898 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13899 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13900 { 0xce000000, 0xff808000, disas_crypto_four_reg },
13901 { 0xce800000, 0xffe00000, disas_crypto_xar },
13902 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13903 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13904 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13905 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13906 { 0x00000000, 0x00000000, NULL }
13907 };
13908
13909 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13910 {
13911 /* Note that this is called with all non-FP cases from
13912 * table C3-6 so it must UNDEF for entries not specifically
13913 * allocated to instructions in that table.
13914 */
13915 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13916 if (fn) {
13917 fn(s, insn);
13918 } else {
13919 unallocated_encoding(s);
13920 }
13921 }
13922
13923 /* C3.6 Data processing - SIMD and floating point */
13924 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13925 {
13926 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13927 disas_data_proc_fp(s, insn);
13928 } else {
13929 /* SIMD, including crypto */
13930 disas_data_proc_simd(s, insn);
13931 }
13932 }
13933
13934 /**
13935 * is_guarded_page:
13936 * @env: The cpu environment
13937 * @s: The DisasContext
13938 *
13939 * Return true if the page is guarded.
13940 */
13941 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13942 {
13943 #ifdef CONFIG_USER_ONLY
13944 return false; /* FIXME */
13945 #else
13946 uint64_t addr = s->base.pc_first;
13947 int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13948 unsigned int index = tlb_index(env, mmu_idx, addr);
13949 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
13950
13951 /*
13952 * We test this immediately after reading an insn, which means
13953 * that any normal page must be in the TLB. The only exception
13954 * would be for executing from flash or device memory, which
13955 * does not retain the TLB entry.
13956 *
13957 * FIXME: Assume false for those, for now. We could use
13958 * arm_cpu_get_phys_page_attrs_debug to re-read the page
13959 * table entry even for that case.
13960 */
13961 return (tlb_hit(entry->addr_code, addr) &&
13962 env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0);
13963 #endif
13964 }
13965
13966 /**
13967 * btype_destination_ok:
13968 * @insn: The instruction at the branch destination
13969 * @bt: SCTLR_ELx.BT
13970 * @btype: PSTATE.BTYPE, and is non-zero
13971 *
13972 * On a guarded page, there are a limited number of insns
13973 * that may be present at the branch target:
13974 * - branch target identifiers,
13975 * - paciasp, pacibsp,
13976 * - BRK insn
13977 * - HLT insn
13978 * Anything else causes a Branch Target Exception.
13979 *
13980 * Return true if the branch is compatible, false to raise BTITRAP.
13981 */
13982 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
13983 {
13984 if ((insn & 0xfffff01fu) == 0xd503201fu) {
13985 /* HINT space */
13986 switch (extract32(insn, 5, 7)) {
13987 case 0b011001: /* PACIASP */
13988 case 0b011011: /* PACIBSP */
13989 /*
13990 * If SCTLR_ELx.BT, then PACI*SP are not compatible
13991 * with btype == 3. Otherwise all btype are ok.
13992 */
13993 return !bt || btype != 3;
13994 case 0b100000: /* BTI */
13995 /* Not compatible with any btype. */
13996 return false;
13997 case 0b100010: /* BTI c */
13998 /* Not compatible with btype == 3 */
13999 return btype != 3;
14000 case 0b100100: /* BTI j */
14001 /* Not compatible with btype == 2 */
14002 return btype != 2;
14003 case 0b100110: /* BTI jc */
14004 /* Compatible with any btype. */
14005 return true;
14006 }
14007 } else {
14008 switch (insn & 0xffe0001fu) {
14009 case 0xd4200000u: /* BRK */
14010 case 0xd4400000u: /* HLT */
14011 /* Give priority to the breakpoint exception. */
14012 return true;
14013 }
14014 }
14015 return false;
14016 }
14017
14018 /* C3.1 A64 instruction index by encoding */
14019 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
14020 {
14021 uint32_t insn;
14022
14023 s->pc_curr = s->base.pc_next;
14024 insn = arm_ldl_code(env, s->base.pc_next, s->sctlr_b);
14025 s->insn = insn;
14026 s->base.pc_next += 4;
14027
14028 s->fp_access_checked = false;
14029
14030 if (dc_isar_feature(aa64_bti, s)) {
14031 if (s->base.num_insns == 1) {
14032 /*
14033 * At the first insn of the TB, compute s->guarded_page.
14034 * We delayed computing this until successfully reading
14035 * the first insn of the TB, above. This (mostly) ensures
14036 * that the softmmu tlb entry has been populated, and the
14037 * page table GP bit is available.
14038 *
14039 * Note that we need to compute this even if btype == 0,
14040 * because this value is used for BR instructions later
14041 * where ENV is not available.
14042 */
14043 s->guarded_page = is_guarded_page(env, s);
14044
14045 /* First insn can have btype set to non-zero. */
14046 tcg_debug_assert(s->btype >= 0);
14047
14048 /*
14049 * Note that the Branch Target Exception has fairly high
14050 * priority -- below debugging exceptions but above most
14051 * everything else. This allows us to handle this now
14052 * instead of waiting until the insn is otherwise decoded.
14053 */
14054 if (s->btype != 0
14055 && s->guarded_page
14056 && !btype_destination_ok(insn, s->bt, s->btype)) {
14057 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
14058 syn_btitrap(s->btype),
14059 default_exception_el(s));
14060 return;
14061 }
14062 } else {
14063 /* Not the first insn: btype must be 0. */
14064 tcg_debug_assert(s->btype == 0);
14065 }
14066 }
14067
14068 switch (extract32(insn, 25, 4)) {
14069 case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
14070 unallocated_encoding(s);
14071 break;
14072 case 0x2:
14073 if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
14074 unallocated_encoding(s);
14075 }
14076 break;
14077 case 0x8: case 0x9: /* Data processing - immediate */
14078 disas_data_proc_imm(s, insn);
14079 break;
14080 case 0xa: case 0xb: /* Branch, exception generation and system insns */
14081 disas_b_exc_sys(s, insn);
14082 break;
14083 case 0x4:
14084 case 0x6:
14085 case 0xc:
14086 case 0xe: /* Loads and stores */
14087 disas_ldst(s, insn);
14088 break;
14089 case 0x5:
14090 case 0xd: /* Data processing - register */
14091 disas_data_proc_reg(s, insn);
14092 break;
14093 case 0x7:
14094 case 0xf: /* Data processing - SIMD and floating point */
14095 disas_data_proc_simd_fp(s, insn);
14096 break;
14097 default:
14098 assert(FALSE); /* all 15 cases should be handled above */
14099 break;
14100 }
14101
14102 /* if we allocated any temporaries, free them here */
14103 free_tmp_a64(s);
14104
14105 /*
14106 * After execution of most insns, btype is reset to 0.
14107 * Note that we set btype == -1 when the insn sets btype.
14108 */
14109 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14110 reset_btype(s);
14111 }
14112 }
14113
14114 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14115 CPUState *cpu)
14116 {
14117 DisasContext *dc = container_of(dcbase, DisasContext, base);
14118 CPUARMState *env = cpu->env_ptr;
14119 ARMCPU *arm_cpu = env_archcpu(env);
14120 uint32_t tb_flags = dc->base.tb->flags;
14121 int bound, core_mmu_idx;
14122
14123 dc->isar = &arm_cpu->isar;
14124 dc->condjmp = 0;
14125
14126 dc->aarch64 = 1;
14127 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
14128 * there is no secure EL1, so we route exceptions to EL3.
14129 */
14130 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
14131 !arm_el_is_aa64(env, 3);
14132 dc->thumb = 0;
14133 dc->sctlr_b = 0;
14134 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
14135 dc->condexec_mask = 0;
14136 dc->condexec_cond = 0;
14137 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
14138 dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
14139 dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
14140 dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID);
14141 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14142 #if !defined(CONFIG_USER_ONLY)
14143 dc->user = (dc->current_el == 0);
14144 #endif
14145 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
14146 dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL);
14147 dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16;
14148 dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE);
14149 dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
14150 dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
14151 dc->vec_len = 0;
14152 dc->vec_stride = 0;
14153 dc->cp_regs = arm_cpu->cp_regs;
14154 dc->features = env->features;
14155
14156 /* Single step state. The code-generation logic here is:
14157 * SS_ACTIVE == 0:
14158 * generate code with no special handling for single-stepping (except
14159 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14160 * this happens anyway because those changes are all system register or
14161 * PSTATE writes).
14162 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14163 * emit code for one insn
14164 * emit code to clear PSTATE.SS
14165 * emit code to generate software step exception for completed step
14166 * end TB (as usual for having generated an exception)
14167 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14168 * emit code to generate a software step exception
14169 * end the TB
14170 */
14171 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
14172 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
14173 dc->is_ldex = false;
14174 dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
14175
14176 /* Bound the number of insns to execute to those left on the page. */
14177 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14178
14179 /* If architectural single step active, limit to 1. */
14180 if (dc->ss_active) {
14181 bound = 1;
14182 }
14183 dc->base.max_insns = MIN(dc->base.max_insns, bound);
14184
14185 init_tmp_a64_array(dc);
14186 }
14187
14188 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14189 {
14190 }
14191
14192 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14193 {
14194 DisasContext *dc = container_of(dcbase, DisasContext, base);
14195
14196 tcg_gen_insn_start(dc->base.pc_next, 0, 0);
14197 dc->insn_start = tcg_last_op();
14198 }
14199
14200 static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
14201 const CPUBreakpoint *bp)
14202 {
14203 DisasContext *dc = container_of(dcbase, DisasContext, base);
14204
14205 if (bp->flags & BP_CPU) {
14206 gen_a64_set_pc_im(dc->base.pc_next);
14207 gen_helper_check_breakpoints(cpu_env);
14208 /* End the TB early; it likely won't be executed */
14209 dc->base.is_jmp = DISAS_TOO_MANY;
14210 } else {
14211 gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
14212 /* The address covered by the breakpoint must be
14213 included in [tb->pc, tb->pc + tb->size) in order
14214 to for it to be properly cleared -- thus we
14215 increment the PC here so that the logic setting
14216 tb->size below does the right thing. */
14217 dc->base.pc_next += 4;
14218 dc->base.is_jmp = DISAS_NORETURN;
14219 }
14220
14221 return true;
14222 }
14223
14224 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14225 {
14226 DisasContext *dc = container_of(dcbase, DisasContext, base);
14227 CPUARMState *env = cpu->env_ptr;
14228
14229 if (dc->ss_active && !dc->pstate_ss) {
14230 /* Singlestep state is Active-pending.
14231 * If we're in this state at the start of a TB then either
14232 * a) we just took an exception to an EL which is being debugged
14233 * and this is the first insn in the exception handler
14234 * b) debug exceptions were masked and we just unmasked them
14235 * without changing EL (eg by clearing PSTATE.D)
14236 * In either case we're going to take a swstep exception in the
14237 * "did not step an insn" case, and so the syndrome ISV and EX
14238 * bits should be zero.
14239 */
14240 assert(dc->base.num_insns == 1);
14241 gen_swstep_exception(dc, 0, 0);
14242 dc->base.is_jmp = DISAS_NORETURN;
14243 } else {
14244 disas_a64_insn(env, dc);
14245 }
14246
14247 translator_loop_temp_check(&dc->base);
14248 }
14249
14250 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14251 {
14252 DisasContext *dc = container_of(dcbase, DisasContext, base);
14253
14254 if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
14255 /* Note that this means single stepping WFI doesn't halt the CPU.
14256 * For conditional branch insns this is harmless unreachable code as
14257 * gen_goto_tb() has already handled emitting the debug exception
14258 * (and thus a tb-jump is not possible when singlestepping).
14259 */
14260 switch (dc->base.is_jmp) {
14261 default:
14262 gen_a64_set_pc_im(dc->base.pc_next);
14263 /* fall through */
14264 case DISAS_EXIT:
14265 case DISAS_JUMP:
14266 if (dc->base.singlestep_enabled) {
14267 gen_exception_internal(EXCP_DEBUG);
14268 } else {
14269 gen_step_complete_exception(dc);
14270 }
14271 break;
14272 case DISAS_NORETURN:
14273 break;
14274 }
14275 } else {
14276 switch (dc->base.is_jmp) {
14277 case DISAS_NEXT:
14278 case DISAS_TOO_MANY:
14279 gen_goto_tb(dc, 1, dc->base.pc_next);
14280 break;
14281 default:
14282 case DISAS_UPDATE:
14283 gen_a64_set_pc_im(dc->base.pc_next);
14284 /* fall through */
14285 case DISAS_EXIT:
14286 tcg_gen_exit_tb(NULL, 0);
14287 break;
14288 case DISAS_JUMP:
14289 tcg_gen_lookup_and_goto_ptr();
14290 break;
14291 case DISAS_NORETURN:
14292 case DISAS_SWI:
14293 break;
14294 case DISAS_WFE:
14295 gen_a64_set_pc_im(dc->base.pc_next);
14296 gen_helper_wfe(cpu_env);
14297 break;
14298 case DISAS_YIELD:
14299 gen_a64_set_pc_im(dc->base.pc_next);
14300 gen_helper_yield(cpu_env);
14301 break;
14302 case DISAS_WFI:
14303 {
14304 /* This is a special case because we don't want to just halt the CPU
14305 * if trying to debug across a WFI.
14306 */
14307 TCGv_i32 tmp = tcg_const_i32(4);
14308
14309 gen_a64_set_pc_im(dc->base.pc_next);
14310 gen_helper_wfi(cpu_env, tmp);
14311 tcg_temp_free_i32(tmp);
14312 /* The helper doesn't necessarily throw an exception, but we
14313 * must go back to the main loop to check for interrupts anyway.
14314 */
14315 tcg_gen_exit_tb(NULL, 0);
14316 break;
14317 }
14318 }
14319 }
14320 }
14321
14322 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14323 CPUState *cpu)
14324 {
14325 DisasContext *dc = container_of(dcbase, DisasContext, base);
14326
14327 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
14328 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
14329 }
14330
14331 const TranslatorOps aarch64_translator_ops = {
14332 .init_disas_context = aarch64_tr_init_disas_context,
14333 .tb_start = aarch64_tr_tb_start,
14334 .insn_start = aarch64_tr_insn_start,
14335 .breakpoint_check = aarch64_tr_breakpoint_check,
14336 .translate_insn = aarch64_tr_translate_insn,
14337 .tb_stop = aarch64_tr_tb_stop,
14338 .disas_log = aarch64_tr_disas_log,
14339 };