4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "translate.h"
22 #include "translate-a64.h"
24 #include "disas/disas.h"
26 #include "semihosting/semihost.h"
29 static TCGv_i64 cpu_X
[32];
30 static TCGv_i64 cpu_pc
;
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high
;
35 static const char *regnames
[] = {
36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
43 A64_SHIFT_TYPE_LSL
= 0,
44 A64_SHIFT_TYPE_LSR
= 1,
45 A64_SHIFT_TYPE_ASR
= 2,
46 A64_SHIFT_TYPE_ROR
= 3
50 * Helpers for extracting complex instruction fields
54 * For load/store with an unsigned 12 bit immediate scaled by the element
55 * size. The input has the immediate field in bits [14:3] and the element
58 static int uimm_scaled(DisasContext
*s
, int x
)
60 unsigned imm
= x
>> 3;
61 unsigned scale
= extract32(x
, 0, 3);
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66 static int scale_by_log2_tag_granule(DisasContext
*s
, int x
)
68 return x
<< LOG2_TAG_GRANULE
;
72 * Include the generated decoders.
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
78 /* Table based decoder typedefs - used when the relevant bits for decode
79 * are too awkwardly scattered across the instruction (eg SIMD).
81 typedef void AArch64DecodeFn(DisasContext
*s
, uint32_t insn
);
83 typedef struct AArch64DecodeTable
{
86 AArch64DecodeFn
*disas_fn
;
89 /* initialize TCG globals. */
90 void a64_translate_init(void)
94 cpu_pc
= tcg_global_mem_new_i64(cpu_env
,
95 offsetof(CPUARMState
, pc
),
97 for (i
= 0; i
< 32; i
++) {
98 cpu_X
[i
] = tcg_global_mem_new_i64(cpu_env
,
99 offsetof(CPUARMState
, xregs
[i
]),
103 cpu_exclusive_high
= tcg_global_mem_new_i64(cpu_env
,
104 offsetof(CPUARMState
, exclusive_high
), "exclusive_high");
108 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
110 static int get_a64_user_mem_index(DisasContext
*s
)
113 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
114 * which is the usual mmu_idx for this cpu state.
116 ARMMMUIdx useridx
= s
->mmu_idx
;
120 * We have pre-computed the condition for AccType_UNPRIV.
121 * Therefore we should never get here with a mmu_idx for
122 * which we do not know the corresponding user mmu_idx.
125 case ARMMMUIdx_E10_1
:
126 case ARMMMUIdx_E10_1_PAN
:
127 useridx
= ARMMMUIdx_E10_0
;
129 case ARMMMUIdx_E20_2
:
130 case ARMMMUIdx_E20_2_PAN
:
131 useridx
= ARMMMUIdx_E20_0
;
134 g_assert_not_reached();
137 return arm_to_core_mmu_idx(useridx
);
140 static void set_btype_raw(int val
)
142 tcg_gen_st_i32(tcg_constant_i32(val
), cpu_env
,
143 offsetof(CPUARMState
, btype
));
146 static void set_btype(DisasContext
*s
, int val
)
148 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
149 tcg_debug_assert(val
>= 1 && val
<= 3);
154 static void reset_btype(DisasContext
*s
)
162 static void gen_pc_plus_diff(DisasContext
*s
, TCGv_i64 dest
, target_long diff
)
164 assert(s
->pc_save
!= -1);
165 if (tb_cflags(s
->base
.tb
) & CF_PCREL
) {
166 tcg_gen_addi_i64(dest
, cpu_pc
, (s
->pc_curr
- s
->pc_save
) + diff
);
168 tcg_gen_movi_i64(dest
, s
->pc_curr
+ diff
);
172 void gen_a64_update_pc(DisasContext
*s
, target_long diff
)
174 gen_pc_plus_diff(s
, cpu_pc
, diff
);
175 s
->pc_save
= s
->pc_curr
+ diff
;
179 * Handle Top Byte Ignore (TBI) bits.
181 * If address tagging is enabled via the TCR TBI bits:
182 * + for EL2 and EL3 there is only one TBI bit, and if it is set
183 * then the address is zero-extended, clearing bits [63:56]
184 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
185 * and TBI1 controls addresses with bit 55 == 1.
186 * If the appropriate TBI bit is set for the address then
187 * the address is sign-extended from bit 55 into bits [63:56]
189 * Here We have concatenated TBI{1,0} into tbi.
191 static void gen_top_byte_ignore(DisasContext
*s
, TCGv_i64 dst
,
192 TCGv_i64 src
, int tbi
)
195 /* Load unmodified address */
196 tcg_gen_mov_i64(dst
, src
);
197 } else if (!regime_has_2_ranges(s
->mmu_idx
)) {
198 /* Force tag byte to all zero */
199 tcg_gen_extract_i64(dst
, src
, 0, 56);
201 /* Sign-extend from bit 55. */
202 tcg_gen_sextract_i64(dst
, src
, 0, 56);
206 /* tbi0 but !tbi1: only use the extension if positive */
207 tcg_gen_and_i64(dst
, dst
, src
);
210 /* !tbi0 but tbi1: only use the extension if negative */
211 tcg_gen_or_i64(dst
, dst
, src
);
214 /* tbi0 and tbi1: always use the extension */
217 g_assert_not_reached();
222 static void gen_a64_set_pc(DisasContext
*s
, TCGv_i64 src
)
225 * If address tagging is enabled for instructions via the TCR TBI bits,
226 * then loading an address into the PC will clear out any tag.
228 gen_top_byte_ignore(s
, cpu_pc
, src
, s
->tbii
);
233 * Handle MTE and/or TBI.
235 * For TBI, ideally, we would do nothing. Proper behaviour on fault is
236 * for the tag to be present in the FAR_ELx register. But for user-only
237 * mode we do not have a TLB with which to implement this, so we must
238 * remove the top byte now.
240 * Always return a fresh temporary that we can increment independently
241 * of the write-back address.
244 TCGv_i64
clean_data_tbi(DisasContext
*s
, TCGv_i64 addr
)
246 TCGv_i64 clean
= tcg_temp_new_i64();
247 #ifdef CONFIG_USER_ONLY
248 gen_top_byte_ignore(s
, clean
, addr
, s
->tbid
);
250 tcg_gen_mov_i64(clean
, addr
);
255 /* Insert a zero tag into src, with the result at dst. */
256 static void gen_address_with_allocation_tag0(TCGv_i64 dst
, TCGv_i64 src
)
258 tcg_gen_andi_i64(dst
, src
, ~MAKE_64BIT_MASK(56, 4));
261 static void gen_probe_access(DisasContext
*s
, TCGv_i64 ptr
,
262 MMUAccessType acc
, int log2_size
)
264 gen_helper_probe_access(cpu_env
, ptr
,
265 tcg_constant_i32(acc
),
266 tcg_constant_i32(get_mem_index(s
)),
267 tcg_constant_i32(1 << log2_size
));
271 * For MTE, check a single logical or atomic access. This probes a single
272 * address, the exact one specified. The size and alignment of the access
273 * is not relevant to MTE, per se, but watchpoints do require the size,
274 * and we want to recognize those before making any other changes to state.
276 static TCGv_i64
gen_mte_check1_mmuidx(DisasContext
*s
, TCGv_i64 addr
,
277 bool is_write
, bool tag_checked
,
278 MemOp memop
, bool is_unpriv
,
281 if (tag_checked
&& s
->mte_active
[is_unpriv
]) {
285 desc
= FIELD_DP32(desc
, MTEDESC
, MIDX
, core_idx
);
286 desc
= FIELD_DP32(desc
, MTEDESC
, TBI
, s
->tbid
);
287 desc
= FIELD_DP32(desc
, MTEDESC
, TCMA
, s
->tcma
);
288 desc
= FIELD_DP32(desc
, MTEDESC
, WRITE
, is_write
);
289 desc
= FIELD_DP32(desc
, MTEDESC
, ALIGN
, get_alignment_bits(memop
));
290 desc
= FIELD_DP32(desc
, MTEDESC
, SIZEM1
, memop_size(memop
) - 1);
292 ret
= tcg_temp_new_i64();
293 gen_helper_mte_check(ret
, cpu_env
, tcg_constant_i32(desc
), addr
);
297 return clean_data_tbi(s
, addr
);
300 TCGv_i64
gen_mte_check1(DisasContext
*s
, TCGv_i64 addr
, bool is_write
,
301 bool tag_checked
, MemOp memop
)
303 return gen_mte_check1_mmuidx(s
, addr
, is_write
, tag_checked
, memop
,
304 false, get_mem_index(s
));
308 * For MTE, check multiple logical sequential accesses.
310 TCGv_i64
gen_mte_checkN(DisasContext
*s
, TCGv_i64 addr
, bool is_write
,
311 bool tag_checked
, int total_size
, MemOp single_mop
)
313 if (tag_checked
&& s
->mte_active
[0]) {
317 desc
= FIELD_DP32(desc
, MTEDESC
, MIDX
, get_mem_index(s
));
318 desc
= FIELD_DP32(desc
, MTEDESC
, TBI
, s
->tbid
);
319 desc
= FIELD_DP32(desc
, MTEDESC
, TCMA
, s
->tcma
);
320 desc
= FIELD_DP32(desc
, MTEDESC
, WRITE
, is_write
);
321 desc
= FIELD_DP32(desc
, MTEDESC
, ALIGN
, get_alignment_bits(single_mop
));
322 desc
= FIELD_DP32(desc
, MTEDESC
, SIZEM1
, total_size
- 1);
324 ret
= tcg_temp_new_i64();
325 gen_helper_mte_check(ret
, cpu_env
, tcg_constant_i32(desc
), addr
);
329 return clean_data_tbi(s
, addr
);
333 * Generate the special alignment check that applies to AccType_ATOMIC
334 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
335 * naturally aligned, but it must not cross a 16-byte boundary.
336 * See AArch64.CheckAlignment().
338 static void check_lse2_align(DisasContext
*s
, int rn
, int imm
,
339 bool is_write
, MemOp mop
)
343 TCGLabel
*over_label
;
347 tmp
= tcg_temp_new_i32();
348 tcg_gen_extrl_i64_i32(tmp
, cpu_reg_sp(s
, rn
));
349 tcg_gen_addi_i32(tmp
, tmp
, imm
& 15);
350 tcg_gen_andi_i32(tmp
, tmp
, 15);
351 tcg_gen_addi_i32(tmp
, tmp
, memop_size(mop
));
353 over_label
= gen_new_label();
354 tcg_gen_brcondi_i32(TCG_COND_LEU
, tmp
, 16, over_label
);
356 addr
= tcg_temp_new_i64();
357 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, rn
), imm
);
359 type
= is_write
? MMU_DATA_STORE
: MMU_DATA_LOAD
,
360 mmu_idx
= get_mem_index(s
);
361 gen_helper_unaligned_access(cpu_env
, addr
, tcg_constant_i32(type
),
362 tcg_constant_i32(mmu_idx
));
364 gen_set_label(over_label
);
368 /* Handle the alignment check for AccType_ATOMIC instructions. */
369 static MemOp
check_atomic_align(DisasContext
*s
, int rn
, MemOp mop
)
371 MemOp size
= mop
& MO_SIZE
;
378 * If size == MO_128, this is a LDXP, and the operation is single-copy
379 * atomic for each doubleword, not the entire quadword; it still must
380 * be quadword aligned.
382 if (size
== MO_128
) {
383 return finalize_memop_atom(s
, MO_128
| MO_ALIGN
,
384 MO_ATOM_IFALIGN_PAIR
);
386 if (dc_isar_feature(aa64_lse2
, s
)) {
387 check_lse2_align(s
, rn
, 0, true, mop
);
391 return finalize_memop(s
, mop
);
394 /* Handle the alignment check for AccType_ORDERED instructions. */
395 static MemOp
check_ordered_align(DisasContext
*s
, int rn
, int imm
,
396 bool is_write
, MemOp mop
)
398 MemOp size
= mop
& MO_SIZE
;
403 if (size
== MO_128
) {
404 return finalize_memop_atom(s
, MO_128
| MO_ALIGN
,
405 MO_ATOM_IFALIGN_PAIR
);
407 if (!dc_isar_feature(aa64_lse2
, s
)) {
409 } else if (!s
->naa
) {
410 check_lse2_align(s
, rn
, imm
, is_write
, mop
);
412 return finalize_memop(s
, mop
);
415 typedef struct DisasCompare64
{
420 static void a64_test_cc(DisasCompare64
*c64
, int cc
)
424 arm_test_cc(&c32
, cc
);
427 * Sign-extend the 32-bit value so that the GE/LT comparisons work
428 * properly. The NE/EQ comparisons are also fine with this choice.
430 c64
->cond
= c32
.cond
;
431 c64
->value
= tcg_temp_new_i64();
432 tcg_gen_ext_i32_i64(c64
->value
, c32
.value
);
435 static void gen_rebuild_hflags(DisasContext
*s
)
437 gen_helper_rebuild_hflags_a64(cpu_env
, tcg_constant_i32(s
->current_el
));
440 static void gen_exception_internal(int excp
)
442 assert(excp_is_internal(excp
));
443 gen_helper_exception_internal(cpu_env
, tcg_constant_i32(excp
));
446 static void gen_exception_internal_insn(DisasContext
*s
, int excp
)
448 gen_a64_update_pc(s
, 0);
449 gen_exception_internal(excp
);
450 s
->base
.is_jmp
= DISAS_NORETURN
;
453 static void gen_exception_bkpt_insn(DisasContext
*s
, uint32_t syndrome
)
455 gen_a64_update_pc(s
, 0);
456 gen_helper_exception_bkpt_insn(cpu_env
, tcg_constant_i32(syndrome
));
457 s
->base
.is_jmp
= DISAS_NORETURN
;
460 static void gen_step_complete_exception(DisasContext
*s
)
462 /* We just completed step of an insn. Move from Active-not-pending
463 * to Active-pending, and then also take the swstep exception.
464 * This corresponds to making the (IMPDEF) choice to prioritize
465 * swstep exceptions over asynchronous exceptions taken to an exception
466 * level where debug is disabled. This choice has the advantage that
467 * we do not need to maintain internal state corresponding to the
468 * ISV/EX syndrome bits between completion of the step and generation
469 * of the exception, and our syndrome information is always correct.
472 gen_swstep_exception(s
, 1, s
->is_ldex
);
473 s
->base
.is_jmp
= DISAS_NORETURN
;
476 static inline bool use_goto_tb(DisasContext
*s
, uint64_t dest
)
481 return translator_use_goto_tb(&s
->base
, dest
);
484 static void gen_goto_tb(DisasContext
*s
, int n
, int64_t diff
)
486 if (use_goto_tb(s
, s
->pc_curr
+ diff
)) {
488 * For pcrel, the pc must always be up-to-date on entry to
489 * the linked TB, so that it can use simple additions for all
490 * further adjustments. For !pcrel, the linked TB is compiled
491 * to know its full virtual address, so we can delay the
492 * update to pc to the unlinked path. A long chain of links
493 * can thus avoid many updates to the PC.
495 if (tb_cflags(s
->base
.tb
) & CF_PCREL
) {
496 gen_a64_update_pc(s
, diff
);
500 gen_a64_update_pc(s
, diff
);
502 tcg_gen_exit_tb(s
->base
.tb
, n
);
503 s
->base
.is_jmp
= DISAS_NORETURN
;
505 gen_a64_update_pc(s
, diff
);
507 gen_step_complete_exception(s
);
509 tcg_gen_lookup_and_goto_ptr();
510 s
->base
.is_jmp
= DISAS_NORETURN
;
516 * Register access functions
518 * These functions are used for directly accessing a register in where
519 * changes to the final register value are likely to be made. If you
520 * need to use a register for temporary calculation (e.g. index type
521 * operations) use the read_* form.
523 * B1.2.1 Register mappings
525 * In instruction register encoding 31 can refer to ZR (zero register) or
526 * the SP (stack pointer) depending on context. In QEMU's case we map SP
527 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
528 * This is the point of the _sp forms.
530 TCGv_i64
cpu_reg(DisasContext
*s
, int reg
)
533 TCGv_i64 t
= tcg_temp_new_i64();
534 tcg_gen_movi_i64(t
, 0);
541 /* register access for when 31 == SP */
542 TCGv_i64
cpu_reg_sp(DisasContext
*s
, int reg
)
547 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
548 * representing the register contents. This TCGv is an auto-freed
549 * temporary so it need not be explicitly freed, and may be modified.
551 TCGv_i64
read_cpu_reg(DisasContext
*s
, int reg
, int sf
)
553 TCGv_i64 v
= tcg_temp_new_i64();
556 tcg_gen_mov_i64(v
, cpu_X
[reg
]);
558 tcg_gen_ext32u_i64(v
, cpu_X
[reg
]);
561 tcg_gen_movi_i64(v
, 0);
566 TCGv_i64
read_cpu_reg_sp(DisasContext
*s
, int reg
, int sf
)
568 TCGv_i64 v
= tcg_temp_new_i64();
570 tcg_gen_mov_i64(v
, cpu_X
[reg
]);
572 tcg_gen_ext32u_i64(v
, cpu_X
[reg
]);
577 /* Return the offset into CPUARMState of a slice (from
578 * the least significant end) of FP register Qn (ie
580 * (Note that this is not the same mapping as for A32; see cpu.h)
582 static inline int fp_reg_offset(DisasContext
*s
, int regno
, MemOp size
)
584 return vec_reg_offset(s
, regno
, 0, size
);
587 /* Offset of the high half of the 128 bit vector Qn */
588 static inline int fp_reg_hi_offset(DisasContext
*s
, int regno
)
590 return vec_reg_offset(s
, regno
, 1, MO_64
);
593 /* Convenience accessors for reading and writing single and double
594 * FP registers. Writing clears the upper parts of the associated
595 * 128 bit vector register, as required by the architecture.
596 * Note that unlike the GP register accessors, the values returned
597 * by the read functions must be manually freed.
599 static TCGv_i64
read_fp_dreg(DisasContext
*s
, int reg
)
601 TCGv_i64 v
= tcg_temp_new_i64();
603 tcg_gen_ld_i64(v
, cpu_env
, fp_reg_offset(s
, reg
, MO_64
));
607 static TCGv_i32
read_fp_sreg(DisasContext
*s
, int reg
)
609 TCGv_i32 v
= tcg_temp_new_i32();
611 tcg_gen_ld_i32(v
, cpu_env
, fp_reg_offset(s
, reg
, MO_32
));
615 static TCGv_i32
read_fp_hreg(DisasContext
*s
, int reg
)
617 TCGv_i32 v
= tcg_temp_new_i32();
619 tcg_gen_ld16u_i32(v
, cpu_env
, fp_reg_offset(s
, reg
, MO_16
));
623 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
624 * If SVE is not enabled, then there are only 128 bits in the vector.
626 static void clear_vec_high(DisasContext
*s
, bool is_q
, int rd
)
628 unsigned ofs
= fp_reg_offset(s
, rd
, MO_64
);
629 unsigned vsz
= vec_full_reg_size(s
);
631 /* Nop move, with side effect of clearing the tail. */
632 tcg_gen_gvec_mov(MO_64
, ofs
, ofs
, is_q
? 16 : 8, vsz
);
635 void write_fp_dreg(DisasContext
*s
, int reg
, TCGv_i64 v
)
637 unsigned ofs
= fp_reg_offset(s
, reg
, MO_64
);
639 tcg_gen_st_i64(v
, cpu_env
, ofs
);
640 clear_vec_high(s
, false, reg
);
643 static void write_fp_sreg(DisasContext
*s
, int reg
, TCGv_i32 v
)
645 TCGv_i64 tmp
= tcg_temp_new_i64();
647 tcg_gen_extu_i32_i64(tmp
, v
);
648 write_fp_dreg(s
, reg
, tmp
);
651 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */
652 static void gen_gvec_fn2(DisasContext
*s
, bool is_q
, int rd
, int rn
,
653 GVecGen2Fn
*gvec_fn
, int vece
)
655 gvec_fn(vece
, vec_full_reg_offset(s
, rd
), vec_full_reg_offset(s
, rn
),
656 is_q
? 16 : 8, vec_full_reg_size(s
));
659 /* Expand a 2-operand + immediate AdvSIMD vector operation using
660 * an expander function.
662 static void gen_gvec_fn2i(DisasContext
*s
, bool is_q
, int rd
, int rn
,
663 int64_t imm
, GVecGen2iFn
*gvec_fn
, int vece
)
665 gvec_fn(vece
, vec_full_reg_offset(s
, rd
), vec_full_reg_offset(s
, rn
),
666 imm
, is_q
? 16 : 8, vec_full_reg_size(s
));
669 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */
670 static void gen_gvec_fn3(DisasContext
*s
, bool is_q
, int rd
, int rn
, int rm
,
671 GVecGen3Fn
*gvec_fn
, int vece
)
673 gvec_fn(vece
, vec_full_reg_offset(s
, rd
), vec_full_reg_offset(s
, rn
),
674 vec_full_reg_offset(s
, rm
), is_q
? 16 : 8, vec_full_reg_size(s
));
677 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */
678 static void gen_gvec_fn4(DisasContext
*s
, bool is_q
, int rd
, int rn
, int rm
,
679 int rx
, GVecGen4Fn
*gvec_fn
, int vece
)
681 gvec_fn(vece
, vec_full_reg_offset(s
, rd
), vec_full_reg_offset(s
, rn
),
682 vec_full_reg_offset(s
, rm
), vec_full_reg_offset(s
, rx
),
683 is_q
? 16 : 8, vec_full_reg_size(s
));
686 /* Expand a 2-operand operation using an out-of-line helper. */
687 static void gen_gvec_op2_ool(DisasContext
*s
, bool is_q
, int rd
,
688 int rn
, int data
, gen_helper_gvec_2
*fn
)
690 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
691 vec_full_reg_offset(s
, rn
),
692 is_q
? 16 : 8, vec_full_reg_size(s
), data
, fn
);
695 /* Expand a 3-operand operation using an out-of-line helper. */
696 static void gen_gvec_op3_ool(DisasContext
*s
, bool is_q
, int rd
,
697 int rn
, int rm
, int data
, gen_helper_gvec_3
*fn
)
699 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
700 vec_full_reg_offset(s
, rn
),
701 vec_full_reg_offset(s
, rm
),
702 is_q
? 16 : 8, vec_full_reg_size(s
), data
, fn
);
705 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
706 * an out-of-line helper.
708 static void gen_gvec_op3_fpst(DisasContext
*s
, bool is_q
, int rd
, int rn
,
709 int rm
, bool is_fp16
, int data
,
710 gen_helper_gvec_3_ptr
*fn
)
712 TCGv_ptr fpst
= fpstatus_ptr(is_fp16
? FPST_FPCR_F16
: FPST_FPCR
);
713 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
714 vec_full_reg_offset(s
, rn
),
715 vec_full_reg_offset(s
, rm
), fpst
,
716 is_q
? 16 : 8, vec_full_reg_size(s
), data
, fn
);
719 /* Expand a 3-operand + qc + operation using an out-of-line helper. */
720 static void gen_gvec_op3_qc(DisasContext
*s
, bool is_q
, int rd
, int rn
,
721 int rm
, gen_helper_gvec_3_ptr
*fn
)
723 TCGv_ptr qc_ptr
= tcg_temp_new_ptr();
725 tcg_gen_addi_ptr(qc_ptr
, cpu_env
, offsetof(CPUARMState
, vfp
.qc
));
726 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
727 vec_full_reg_offset(s
, rn
),
728 vec_full_reg_offset(s
, rm
), qc_ptr
,
729 is_q
? 16 : 8, vec_full_reg_size(s
), 0, fn
);
732 /* Expand a 4-operand operation using an out-of-line helper. */
733 static void gen_gvec_op4_ool(DisasContext
*s
, bool is_q
, int rd
, int rn
,
734 int rm
, int ra
, int data
, gen_helper_gvec_4
*fn
)
736 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, rd
),
737 vec_full_reg_offset(s
, rn
),
738 vec_full_reg_offset(s
, rm
),
739 vec_full_reg_offset(s
, ra
),
740 is_q
? 16 : 8, vec_full_reg_size(s
), data
, fn
);
744 * Expand a 4-operand + fpstatus pointer + simd data value operation using
745 * an out-of-line helper.
747 static void gen_gvec_op4_fpst(DisasContext
*s
, bool is_q
, int rd
, int rn
,
748 int rm
, int ra
, bool is_fp16
, int data
,
749 gen_helper_gvec_4_ptr
*fn
)
751 TCGv_ptr fpst
= fpstatus_ptr(is_fp16
? FPST_FPCR_F16
: FPST_FPCR
);
752 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, rd
),
753 vec_full_reg_offset(s
, rn
),
754 vec_full_reg_offset(s
, rm
),
755 vec_full_reg_offset(s
, ra
), fpst
,
756 is_q
? 16 : 8, vec_full_reg_size(s
), data
, fn
);
759 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
760 * than the 32 bit equivalent.
762 static inline void gen_set_NZ64(TCGv_i64 result
)
764 tcg_gen_extr_i64_i32(cpu_ZF
, cpu_NF
, result
);
765 tcg_gen_or_i32(cpu_ZF
, cpu_ZF
, cpu_NF
);
768 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
769 static inline void gen_logic_CC(int sf
, TCGv_i64 result
)
772 gen_set_NZ64(result
);
774 tcg_gen_extrl_i64_i32(cpu_ZF
, result
);
775 tcg_gen_mov_i32(cpu_NF
, cpu_ZF
);
777 tcg_gen_movi_i32(cpu_CF
, 0);
778 tcg_gen_movi_i32(cpu_VF
, 0);
781 /* dest = T0 + T1; compute C, N, V and Z flags */
782 static void gen_add64_CC(TCGv_i64 dest
, TCGv_i64 t0
, TCGv_i64 t1
)
784 TCGv_i64 result
, flag
, tmp
;
785 result
= tcg_temp_new_i64();
786 flag
= tcg_temp_new_i64();
787 tmp
= tcg_temp_new_i64();
789 tcg_gen_movi_i64(tmp
, 0);
790 tcg_gen_add2_i64(result
, flag
, t0
, tmp
, t1
, tmp
);
792 tcg_gen_extrl_i64_i32(cpu_CF
, flag
);
794 gen_set_NZ64(result
);
796 tcg_gen_xor_i64(flag
, result
, t0
);
797 tcg_gen_xor_i64(tmp
, t0
, t1
);
798 tcg_gen_andc_i64(flag
, flag
, tmp
);
799 tcg_gen_extrh_i64_i32(cpu_VF
, flag
);
801 tcg_gen_mov_i64(dest
, result
);
804 static void gen_add32_CC(TCGv_i64 dest
, TCGv_i64 t0
, TCGv_i64 t1
)
806 TCGv_i32 t0_32
= tcg_temp_new_i32();
807 TCGv_i32 t1_32
= tcg_temp_new_i32();
808 TCGv_i32 tmp
= tcg_temp_new_i32();
810 tcg_gen_movi_i32(tmp
, 0);
811 tcg_gen_extrl_i64_i32(t0_32
, t0
);
812 tcg_gen_extrl_i64_i32(t1_32
, t1
);
813 tcg_gen_add2_i32(cpu_NF
, cpu_CF
, t0_32
, tmp
, t1_32
, tmp
);
814 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
815 tcg_gen_xor_i32(cpu_VF
, cpu_NF
, t0_32
);
816 tcg_gen_xor_i32(tmp
, t0_32
, t1_32
);
817 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, tmp
);
818 tcg_gen_extu_i32_i64(dest
, cpu_NF
);
821 static void gen_add_CC(int sf
, TCGv_i64 dest
, TCGv_i64 t0
, TCGv_i64 t1
)
824 gen_add64_CC(dest
, t0
, t1
);
826 gen_add32_CC(dest
, t0
, t1
);
830 /* dest = T0 - T1; compute C, N, V and Z flags */
831 static void gen_sub64_CC(TCGv_i64 dest
, TCGv_i64 t0
, TCGv_i64 t1
)
833 /* 64 bit arithmetic */
834 TCGv_i64 result
, flag
, tmp
;
836 result
= tcg_temp_new_i64();
837 flag
= tcg_temp_new_i64();
838 tcg_gen_sub_i64(result
, t0
, t1
);
840 gen_set_NZ64(result
);
842 tcg_gen_setcond_i64(TCG_COND_GEU
, flag
, t0
, t1
);
843 tcg_gen_extrl_i64_i32(cpu_CF
, flag
);
845 tcg_gen_xor_i64(flag
, result
, t0
);
846 tmp
= tcg_temp_new_i64();
847 tcg_gen_xor_i64(tmp
, t0
, t1
);
848 tcg_gen_and_i64(flag
, flag
, tmp
);
849 tcg_gen_extrh_i64_i32(cpu_VF
, flag
);
850 tcg_gen_mov_i64(dest
, result
);
853 static void gen_sub32_CC(TCGv_i64 dest
, TCGv_i64 t0
, TCGv_i64 t1
)
855 /* 32 bit arithmetic */
856 TCGv_i32 t0_32
= tcg_temp_new_i32();
857 TCGv_i32 t1_32
= tcg_temp_new_i32();
860 tcg_gen_extrl_i64_i32(t0_32
, t0
);
861 tcg_gen_extrl_i64_i32(t1_32
, t1
);
862 tcg_gen_sub_i32(cpu_NF
, t0_32
, t1_32
);
863 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
864 tcg_gen_setcond_i32(TCG_COND_GEU
, cpu_CF
, t0_32
, t1_32
);
865 tcg_gen_xor_i32(cpu_VF
, cpu_NF
, t0_32
);
866 tmp
= tcg_temp_new_i32();
867 tcg_gen_xor_i32(tmp
, t0_32
, t1_32
);
868 tcg_gen_and_i32(cpu_VF
, cpu_VF
, tmp
);
869 tcg_gen_extu_i32_i64(dest
, cpu_NF
);
872 static void gen_sub_CC(int sf
, TCGv_i64 dest
, TCGv_i64 t0
, TCGv_i64 t1
)
875 gen_sub64_CC(dest
, t0
, t1
);
877 gen_sub32_CC(dest
, t0
, t1
);
881 /* dest = T0 + T1 + CF; do not compute flags. */
882 static void gen_adc(int sf
, TCGv_i64 dest
, TCGv_i64 t0
, TCGv_i64 t1
)
884 TCGv_i64 flag
= tcg_temp_new_i64();
885 tcg_gen_extu_i32_i64(flag
, cpu_CF
);
886 tcg_gen_add_i64(dest
, t0
, t1
);
887 tcg_gen_add_i64(dest
, dest
, flag
);
890 tcg_gen_ext32u_i64(dest
, dest
);
894 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
895 static void gen_adc_CC(int sf
, TCGv_i64 dest
, TCGv_i64 t0
, TCGv_i64 t1
)
898 TCGv_i64 result
= tcg_temp_new_i64();
899 TCGv_i64 cf_64
= tcg_temp_new_i64();
900 TCGv_i64 vf_64
= tcg_temp_new_i64();
901 TCGv_i64 tmp
= tcg_temp_new_i64();
902 TCGv_i64 zero
= tcg_constant_i64(0);
904 tcg_gen_extu_i32_i64(cf_64
, cpu_CF
);
905 tcg_gen_add2_i64(result
, cf_64
, t0
, zero
, cf_64
, zero
);
906 tcg_gen_add2_i64(result
, cf_64
, result
, cf_64
, t1
, zero
);
907 tcg_gen_extrl_i64_i32(cpu_CF
, cf_64
);
908 gen_set_NZ64(result
);
910 tcg_gen_xor_i64(vf_64
, result
, t0
);
911 tcg_gen_xor_i64(tmp
, t0
, t1
);
912 tcg_gen_andc_i64(vf_64
, vf_64
, tmp
);
913 tcg_gen_extrh_i64_i32(cpu_VF
, vf_64
);
915 tcg_gen_mov_i64(dest
, result
);
917 TCGv_i32 t0_32
= tcg_temp_new_i32();
918 TCGv_i32 t1_32
= tcg_temp_new_i32();
919 TCGv_i32 tmp
= tcg_temp_new_i32();
920 TCGv_i32 zero
= tcg_constant_i32(0);
922 tcg_gen_extrl_i64_i32(t0_32
, t0
);
923 tcg_gen_extrl_i64_i32(t1_32
, t1
);
924 tcg_gen_add2_i32(cpu_NF
, cpu_CF
, t0_32
, zero
, cpu_CF
, zero
);
925 tcg_gen_add2_i32(cpu_NF
, cpu_CF
, cpu_NF
, cpu_CF
, t1_32
, zero
);
927 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
928 tcg_gen_xor_i32(cpu_VF
, cpu_NF
, t0_32
);
929 tcg_gen_xor_i32(tmp
, t0_32
, t1_32
);
930 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, tmp
);
931 tcg_gen_extu_i32_i64(dest
, cpu_NF
);
936 * Load/Store generators
940 * Store from GPR register to memory.
942 static void do_gpr_st_memidx(DisasContext
*s
, TCGv_i64 source
,
943 TCGv_i64 tcg_addr
, MemOp memop
, int memidx
,
945 unsigned int iss_srt
,
946 bool iss_sf
, bool iss_ar
)
948 tcg_gen_qemu_st_i64(source
, tcg_addr
, memidx
, memop
);
953 syn
= syn_data_abort_with_iss(0,
959 0, 0, 0, 0, 0, false);
960 disas_set_insn_syndrome(s
, syn
);
964 static void do_gpr_st(DisasContext
*s
, TCGv_i64 source
,
965 TCGv_i64 tcg_addr
, MemOp memop
,
967 unsigned int iss_srt
,
968 bool iss_sf
, bool iss_ar
)
970 do_gpr_st_memidx(s
, source
, tcg_addr
, memop
, get_mem_index(s
),
971 iss_valid
, iss_srt
, iss_sf
, iss_ar
);
975 * Load from memory to GPR register
977 static void do_gpr_ld_memidx(DisasContext
*s
, TCGv_i64 dest
, TCGv_i64 tcg_addr
,
978 MemOp memop
, bool extend
, int memidx
,
979 bool iss_valid
, unsigned int iss_srt
,
980 bool iss_sf
, bool iss_ar
)
982 tcg_gen_qemu_ld_i64(dest
, tcg_addr
, memidx
, memop
);
984 if (extend
&& (memop
& MO_SIGN
)) {
985 g_assert((memop
& MO_SIZE
) <= MO_32
);
986 tcg_gen_ext32u_i64(dest
, dest
);
992 syn
= syn_data_abort_with_iss(0,
994 (memop
& MO_SIGN
) != 0,
998 0, 0, 0, 0, 0, false);
999 disas_set_insn_syndrome(s
, syn
);
1003 static void do_gpr_ld(DisasContext
*s
, TCGv_i64 dest
, TCGv_i64 tcg_addr
,
1004 MemOp memop
, bool extend
,
1005 bool iss_valid
, unsigned int iss_srt
,
1006 bool iss_sf
, bool iss_ar
)
1008 do_gpr_ld_memidx(s
, dest
, tcg_addr
, memop
, extend
, get_mem_index(s
),
1009 iss_valid
, iss_srt
, iss_sf
, iss_ar
);
1013 * Store from FP register to memory
1015 static void do_fp_st(DisasContext
*s
, int srcidx
, TCGv_i64 tcg_addr
, MemOp mop
)
1017 /* This writes the bottom N bits of a 128 bit wide vector to memory */
1018 TCGv_i64 tmplo
= tcg_temp_new_i64();
1020 tcg_gen_ld_i64(tmplo
, cpu_env
, fp_reg_offset(s
, srcidx
, MO_64
));
1022 if ((mop
& MO_SIZE
) < MO_128
) {
1023 tcg_gen_qemu_st_i64(tmplo
, tcg_addr
, get_mem_index(s
), mop
);
1025 TCGv_i64 tmphi
= tcg_temp_new_i64();
1026 TCGv_i128 t16
= tcg_temp_new_i128();
1028 tcg_gen_ld_i64(tmphi
, cpu_env
, fp_reg_hi_offset(s
, srcidx
));
1029 tcg_gen_concat_i64_i128(t16
, tmplo
, tmphi
);
1031 tcg_gen_qemu_st_i128(t16
, tcg_addr
, get_mem_index(s
), mop
);
1036 * Load from memory to FP register
1038 static void do_fp_ld(DisasContext
*s
, int destidx
, TCGv_i64 tcg_addr
, MemOp mop
)
1040 /* This always zero-extends and writes to a full 128 bit wide vector */
1041 TCGv_i64 tmplo
= tcg_temp_new_i64();
1042 TCGv_i64 tmphi
= NULL
;
1044 if ((mop
& MO_SIZE
) < MO_128
) {
1045 tcg_gen_qemu_ld_i64(tmplo
, tcg_addr
, get_mem_index(s
), mop
);
1047 TCGv_i128 t16
= tcg_temp_new_i128();
1049 tcg_gen_qemu_ld_i128(t16
, tcg_addr
, get_mem_index(s
), mop
);
1051 tmphi
= tcg_temp_new_i64();
1052 tcg_gen_extr_i128_i64(tmplo
, tmphi
, t16
);
1055 tcg_gen_st_i64(tmplo
, cpu_env
, fp_reg_offset(s
, destidx
, MO_64
));
1058 tcg_gen_st_i64(tmphi
, cpu_env
, fp_reg_hi_offset(s
, destidx
));
1060 clear_vec_high(s
, tmphi
!= NULL
, destidx
);
1064 * Vector load/store helpers.
1066 * The principal difference between this and a FP load is that we don't
1067 * zero extend as we are filling a partial chunk of the vector register.
1068 * These functions don't support 128 bit loads/stores, which would be
1069 * normal load/store operations.
1071 * The _i32 versions are useful when operating on 32 bit quantities
1072 * (eg for floating point single or using Neon helper functions).
1075 /* Get value of an element within a vector register */
1076 static void read_vec_element(DisasContext
*s
, TCGv_i64 tcg_dest
, int srcidx
,
1077 int element
, MemOp memop
)
1079 int vect_off
= vec_reg_offset(s
, srcidx
, element
, memop
& MO_SIZE
);
1080 switch ((unsigned)memop
) {
1082 tcg_gen_ld8u_i64(tcg_dest
, cpu_env
, vect_off
);
1085 tcg_gen_ld16u_i64(tcg_dest
, cpu_env
, vect_off
);
1088 tcg_gen_ld32u_i64(tcg_dest
, cpu_env
, vect_off
);
1091 tcg_gen_ld8s_i64(tcg_dest
, cpu_env
, vect_off
);
1094 tcg_gen_ld16s_i64(tcg_dest
, cpu_env
, vect_off
);
1097 tcg_gen_ld32s_i64(tcg_dest
, cpu_env
, vect_off
);
1101 tcg_gen_ld_i64(tcg_dest
, cpu_env
, vect_off
);
1104 g_assert_not_reached();
1108 static void read_vec_element_i32(DisasContext
*s
, TCGv_i32 tcg_dest
, int srcidx
,
1109 int element
, MemOp memop
)
1111 int vect_off
= vec_reg_offset(s
, srcidx
, element
, memop
& MO_SIZE
);
1114 tcg_gen_ld8u_i32(tcg_dest
, cpu_env
, vect_off
);
1117 tcg_gen_ld16u_i32(tcg_dest
, cpu_env
, vect_off
);
1120 tcg_gen_ld8s_i32(tcg_dest
, cpu_env
, vect_off
);
1123 tcg_gen_ld16s_i32(tcg_dest
, cpu_env
, vect_off
);
1127 tcg_gen_ld_i32(tcg_dest
, cpu_env
, vect_off
);
1130 g_assert_not_reached();
1134 /* Set value of an element within a vector register */
1135 static void write_vec_element(DisasContext
*s
, TCGv_i64 tcg_src
, int destidx
,
1136 int element
, MemOp memop
)
1138 int vect_off
= vec_reg_offset(s
, destidx
, element
, memop
& MO_SIZE
);
1141 tcg_gen_st8_i64(tcg_src
, cpu_env
, vect_off
);
1144 tcg_gen_st16_i64(tcg_src
, cpu_env
, vect_off
);
1147 tcg_gen_st32_i64(tcg_src
, cpu_env
, vect_off
);
1150 tcg_gen_st_i64(tcg_src
, cpu_env
, vect_off
);
1153 g_assert_not_reached();
1157 static void write_vec_element_i32(DisasContext
*s
, TCGv_i32 tcg_src
,
1158 int destidx
, int element
, MemOp memop
)
1160 int vect_off
= vec_reg_offset(s
, destidx
, element
, memop
& MO_SIZE
);
1163 tcg_gen_st8_i32(tcg_src
, cpu_env
, vect_off
);
1166 tcg_gen_st16_i32(tcg_src
, cpu_env
, vect_off
);
1169 tcg_gen_st_i32(tcg_src
, cpu_env
, vect_off
);
1172 g_assert_not_reached();
1176 /* Store from vector register to memory */
1177 static void do_vec_st(DisasContext
*s
, int srcidx
, int element
,
1178 TCGv_i64 tcg_addr
, MemOp mop
)
1180 TCGv_i64 tcg_tmp
= tcg_temp_new_i64();
1182 read_vec_element(s
, tcg_tmp
, srcidx
, element
, mop
& MO_SIZE
);
1183 tcg_gen_qemu_st_i64(tcg_tmp
, tcg_addr
, get_mem_index(s
), mop
);
1186 /* Load from memory to vector register */
1187 static void do_vec_ld(DisasContext
*s
, int destidx
, int element
,
1188 TCGv_i64 tcg_addr
, MemOp mop
)
1190 TCGv_i64 tcg_tmp
= tcg_temp_new_i64();
1192 tcg_gen_qemu_ld_i64(tcg_tmp
, tcg_addr
, get_mem_index(s
), mop
);
1193 write_vec_element(s
, tcg_tmp
, destidx
, element
, mop
& MO_SIZE
);
1196 /* Check that FP/Neon access is enabled. If it is, return
1197 * true. If not, emit code to generate an appropriate exception,
1198 * and return false; the caller should not emit any code for
1199 * the instruction. Note that this check must happen after all
1200 * unallocated-encoding checks (otherwise the syndrome information
1201 * for the resulting exception will be incorrect).
1203 static bool fp_access_check_only(DisasContext
*s
)
1205 if (s
->fp_excp_el
) {
1206 assert(!s
->fp_access_checked
);
1207 s
->fp_access_checked
= true;
1209 gen_exception_insn_el(s
, 0, EXCP_UDEF
,
1210 syn_fp_access_trap(1, 0xe, false, 0),
1214 s
->fp_access_checked
= true;
1218 static bool fp_access_check(DisasContext
*s
)
1220 if (!fp_access_check_only(s
)) {
1223 if (s
->sme_trap_nonstreaming
&& s
->is_nonstreaming
) {
1224 gen_exception_insn(s
, 0, EXCP_UDEF
,
1225 syn_smetrap(SME_ET_Streaming
, false));
1232 * Check that SVE access is enabled. If it is, return true.
1233 * If not, emit code to generate an appropriate exception and return false.
1234 * This function corresponds to CheckSVEEnabled().
1236 bool sve_access_check(DisasContext
*s
)
1238 if (s
->pstate_sm
|| !dc_isar_feature(aa64_sve
, s
)) {
1239 assert(dc_isar_feature(aa64_sme
, s
));
1240 if (!sme_sm_enabled_check(s
)) {
1243 } else if (s
->sve_excp_el
) {
1244 gen_exception_insn_el(s
, 0, EXCP_UDEF
,
1245 syn_sve_access_trap(), s
->sve_excp_el
);
1248 s
->sve_access_checked
= true;
1249 return fp_access_check(s
);
1252 /* Assert that we only raise one exception per instruction. */
1253 assert(!s
->sve_access_checked
);
1254 s
->sve_access_checked
= true;
1259 * Check that SME access is enabled, raise an exception if not.
1260 * Note that this function corresponds to CheckSMEAccess and is
1261 * only used directly for cpregs.
1263 static bool sme_access_check(DisasContext
*s
)
1265 if (s
->sme_excp_el
) {
1266 gen_exception_insn_el(s
, 0, EXCP_UDEF
,
1267 syn_smetrap(SME_ET_AccessTrap
, false),
1274 /* This function corresponds to CheckSMEEnabled. */
1275 bool sme_enabled_check(DisasContext
*s
)
1278 * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1279 * to be zero when fp_excp_el has priority. This is because we need
1280 * sme_excp_el by itself for cpregs access checks.
1282 if (!s
->fp_excp_el
|| s
->sme_excp_el
< s
->fp_excp_el
) {
1283 s
->fp_access_checked
= true;
1284 return sme_access_check(s
);
1286 return fp_access_check_only(s
);
1289 /* Common subroutine for CheckSMEAnd*Enabled. */
1290 bool sme_enabled_check_with_svcr(DisasContext
*s
, unsigned req
)
1292 if (!sme_enabled_check(s
)) {
1295 if (FIELD_EX64(req
, SVCR
, SM
) && !s
->pstate_sm
) {
1296 gen_exception_insn(s
, 0, EXCP_UDEF
,
1297 syn_smetrap(SME_ET_NotStreaming
, false));
1300 if (FIELD_EX64(req
, SVCR
, ZA
) && !s
->pstate_za
) {
1301 gen_exception_insn(s
, 0, EXCP_UDEF
,
1302 syn_smetrap(SME_ET_InactiveZA
, false));
1309 * This utility function is for doing register extension with an
1310 * optional shift. You will likely want to pass a temporary for the
1311 * destination register. See DecodeRegExtend() in the ARM ARM.
1313 static void ext_and_shift_reg(TCGv_i64 tcg_out
, TCGv_i64 tcg_in
,
1314 int option
, unsigned int shift
)
1316 int extsize
= extract32(option
, 0, 2);
1317 bool is_signed
= extract32(option
, 2, 1);
1322 tcg_gen_ext8s_i64(tcg_out
, tcg_in
);
1325 tcg_gen_ext16s_i64(tcg_out
, tcg_in
);
1328 tcg_gen_ext32s_i64(tcg_out
, tcg_in
);
1331 tcg_gen_mov_i64(tcg_out
, tcg_in
);
1337 tcg_gen_ext8u_i64(tcg_out
, tcg_in
);
1340 tcg_gen_ext16u_i64(tcg_out
, tcg_in
);
1343 tcg_gen_ext32u_i64(tcg_out
, tcg_in
);
1346 tcg_gen_mov_i64(tcg_out
, tcg_in
);
1352 tcg_gen_shli_i64(tcg_out
, tcg_out
, shift
);
1356 static inline void gen_check_sp_alignment(DisasContext
*s
)
1358 /* The AArch64 architecture mandates that (if enabled via PSTATE
1359 * or SCTLR bits) there is a check that SP is 16-aligned on every
1360 * SP-relative load or store (with an exception generated if it is not).
1361 * In line with general QEMU practice regarding misaligned accesses,
1362 * we omit these checks for the sake of guest program performance.
1363 * This function is provided as a hook so we can more easily add these
1364 * checks in future (possibly as a "favour catching guest program bugs
1365 * over speed" user selectable option).
1370 * This provides a simple table based table lookup decoder. It is
1371 * intended to be used when the relevant bits for decode are too
1372 * awkwardly placed and switch/if based logic would be confusing and
1373 * deeply nested. Since it's a linear search through the table, tables
1374 * should be kept small.
1376 * It returns the first handler where insn & mask == pattern, or
1377 * NULL if there is no match.
1378 * The table is terminated by an empty mask (i.e. 0)
1380 static inline AArch64DecodeFn
*lookup_disas_fn(const AArch64DecodeTable
*table
,
1383 const AArch64DecodeTable
*tptr
= table
;
1385 while (tptr
->mask
) {
1386 if ((insn
& tptr
->mask
) == tptr
->pattern
) {
1387 return tptr
->disas_fn
;
1395 * The instruction disassembly implemented here matches
1396 * the instruction encoding classifications in chapter C4
1397 * of the ARM Architecture Reference Manual (DDI0487B_a);
1398 * classification names and decode diagrams here should generally
1399 * match up with those in the manual.
1402 static bool trans_B(DisasContext
*s
, arg_i
*a
)
1405 gen_goto_tb(s
, 0, a
->imm
);
1409 static bool trans_BL(DisasContext
*s
, arg_i
*a
)
1411 gen_pc_plus_diff(s
, cpu_reg(s
, 30), curr_insn_len(s
));
1413 gen_goto_tb(s
, 0, a
->imm
);
1418 static bool trans_CBZ(DisasContext
*s
, arg_cbz
*a
)
1423 tcg_cmp
= read_cpu_reg(s
, a
->rt
, a
->sf
);
1426 match
= gen_disas_label(s
);
1427 tcg_gen_brcondi_i64(a
->nz
? TCG_COND_NE
: TCG_COND_EQ
,
1428 tcg_cmp
, 0, match
.label
);
1429 gen_goto_tb(s
, 0, 4);
1430 set_disas_label(s
, match
);
1431 gen_goto_tb(s
, 1, a
->imm
);
1435 static bool trans_TBZ(DisasContext
*s
, arg_tbz
*a
)
1440 tcg_cmp
= tcg_temp_new_i64();
1441 tcg_gen_andi_i64(tcg_cmp
, cpu_reg(s
, a
->rt
), 1ULL << a
->bitpos
);
1445 match
= gen_disas_label(s
);
1446 tcg_gen_brcondi_i64(a
->nz
? TCG_COND_NE
: TCG_COND_EQ
,
1447 tcg_cmp
, 0, match
.label
);
1448 gen_goto_tb(s
, 0, 4);
1449 set_disas_label(s
, match
);
1450 gen_goto_tb(s
, 1, a
->imm
);
1454 static bool trans_B_cond(DisasContext
*s
, arg_B_cond
*a
)
1457 if (a
->cond
< 0x0e) {
1458 /* genuinely conditional branches */
1459 DisasLabel match
= gen_disas_label(s
);
1460 arm_gen_test_cc(a
->cond
, match
.label
);
1461 gen_goto_tb(s
, 0, 4);
1462 set_disas_label(s
, match
);
1463 gen_goto_tb(s
, 1, a
->imm
);
1465 /* 0xe and 0xf are both "always" conditions */
1466 gen_goto_tb(s
, 0, a
->imm
);
1471 static void set_btype_for_br(DisasContext
*s
, int rn
)
1473 if (dc_isar_feature(aa64_bti
, s
)) {
1474 /* BR to {x16,x17} or !guard -> 1, else 3. */
1475 set_btype(s
, rn
== 16 || rn
== 17 || !s
->guarded_page
? 1 : 3);
1479 static void set_btype_for_blr(DisasContext
*s
)
1481 if (dc_isar_feature(aa64_bti
, s
)) {
1482 /* BLR sets BTYPE to 2, regardless of source guarded page. */
1487 static bool trans_BR(DisasContext
*s
, arg_r
*a
)
1489 gen_a64_set_pc(s
, cpu_reg(s
, a
->rn
));
1490 set_btype_for_br(s
, a
->rn
);
1491 s
->base
.is_jmp
= DISAS_JUMP
;
1495 static bool trans_BLR(DisasContext
*s
, arg_r
*a
)
1497 TCGv_i64 dst
= cpu_reg(s
, a
->rn
);
1498 TCGv_i64 lr
= cpu_reg(s
, 30);
1500 TCGv_i64 tmp
= tcg_temp_new_i64();
1501 tcg_gen_mov_i64(tmp
, dst
);
1504 gen_pc_plus_diff(s
, lr
, curr_insn_len(s
));
1505 gen_a64_set_pc(s
, dst
);
1506 set_btype_for_blr(s
);
1507 s
->base
.is_jmp
= DISAS_JUMP
;
1511 static bool trans_RET(DisasContext
*s
, arg_r
*a
)
1513 gen_a64_set_pc(s
, cpu_reg(s
, a
->rn
));
1514 s
->base
.is_jmp
= DISAS_JUMP
;
1518 static TCGv_i64
auth_branch_target(DisasContext
*s
, TCGv_i64 dst
,
1519 TCGv_i64 modifier
, bool use_key_a
)
1523 * Return the branch target for a BRAA/RETA/etc, which is either
1524 * just the destination dst, or that value with the pauth check
1525 * done and the code removed from the high bits.
1527 if (!s
->pauth_active
) {
1531 truedst
= tcg_temp_new_i64();
1533 gen_helper_autia_combined(truedst
, cpu_env
, dst
, modifier
);
1535 gen_helper_autib_combined(truedst
, cpu_env
, dst
, modifier
);
1540 static bool trans_BRAZ(DisasContext
*s
, arg_braz
*a
)
1544 if (!dc_isar_feature(aa64_pauth
, s
)) {
1548 dst
= auth_branch_target(s
, cpu_reg(s
, a
->rn
), tcg_constant_i64(0), !a
->m
);
1549 gen_a64_set_pc(s
, dst
);
1550 set_btype_for_br(s
, a
->rn
);
1551 s
->base
.is_jmp
= DISAS_JUMP
;
1555 static bool trans_BLRAZ(DisasContext
*s
, arg_braz
*a
)
1559 if (!dc_isar_feature(aa64_pauth
, s
)) {
1563 dst
= auth_branch_target(s
, cpu_reg(s
, a
->rn
), tcg_constant_i64(0), !a
->m
);
1564 lr
= cpu_reg(s
, 30);
1566 TCGv_i64 tmp
= tcg_temp_new_i64();
1567 tcg_gen_mov_i64(tmp
, dst
);
1570 gen_pc_plus_diff(s
, lr
, curr_insn_len(s
));
1571 gen_a64_set_pc(s
, dst
);
1572 set_btype_for_blr(s
);
1573 s
->base
.is_jmp
= DISAS_JUMP
;
1577 static bool trans_RETA(DisasContext
*s
, arg_reta
*a
)
1581 dst
= auth_branch_target(s
, cpu_reg(s
, 30), cpu_X
[31], !a
->m
);
1582 gen_a64_set_pc(s
, dst
);
1583 s
->base
.is_jmp
= DISAS_JUMP
;
1587 static bool trans_BRA(DisasContext
*s
, arg_bra
*a
)
1591 if (!dc_isar_feature(aa64_pauth
, s
)) {
1594 dst
= auth_branch_target(s
, cpu_reg(s
,a
->rn
), cpu_reg_sp(s
, a
->rm
), !a
->m
);
1595 gen_a64_set_pc(s
, dst
);
1596 set_btype_for_br(s
, a
->rn
);
1597 s
->base
.is_jmp
= DISAS_JUMP
;
1601 static bool trans_BLRA(DisasContext
*s
, arg_bra
*a
)
1605 if (!dc_isar_feature(aa64_pauth
, s
)) {
1608 dst
= auth_branch_target(s
, cpu_reg(s
, a
->rn
), cpu_reg_sp(s
, a
->rm
), !a
->m
);
1609 lr
= cpu_reg(s
, 30);
1611 TCGv_i64 tmp
= tcg_temp_new_i64();
1612 tcg_gen_mov_i64(tmp
, dst
);
1615 gen_pc_plus_diff(s
, lr
, curr_insn_len(s
));
1616 gen_a64_set_pc(s
, dst
);
1617 set_btype_for_blr(s
);
1618 s
->base
.is_jmp
= DISAS_JUMP
;
1622 static bool trans_ERET(DisasContext
*s
, arg_ERET
*a
)
1626 if (s
->current_el
== 0) {
1630 gen_exception_insn_el(s
, 0, EXCP_UDEF
, 0, 2);
1633 dst
= tcg_temp_new_i64();
1634 tcg_gen_ld_i64(dst
, cpu_env
,
1635 offsetof(CPUARMState
, elr_el
[s
->current_el
]));
1637 translator_io_start(&s
->base
);
1639 gen_helper_exception_return(cpu_env
, dst
);
1640 /* Must exit loop to check un-masked IRQs */
1641 s
->base
.is_jmp
= DISAS_EXIT
;
1645 static bool trans_ERETA(DisasContext
*s
, arg_reta
*a
)
1649 if (!dc_isar_feature(aa64_pauth
, s
)) {
1652 if (s
->current_el
== 0) {
1655 /* The FGT trap takes precedence over an auth trap. */
1657 gen_exception_insn_el(s
, 0, EXCP_UDEF
, a
->m
? 3 : 2, 2);
1660 dst
= tcg_temp_new_i64();
1661 tcg_gen_ld_i64(dst
, cpu_env
,
1662 offsetof(CPUARMState
, elr_el
[s
->current_el
]));
1664 dst
= auth_branch_target(s
, dst
, cpu_X
[31], !a
->m
);
1666 translator_io_start(&s
->base
);
1668 gen_helper_exception_return(cpu_env
, dst
);
1669 /* Must exit loop to check un-masked IRQs */
1670 s
->base
.is_jmp
= DISAS_EXIT
;
1674 static bool trans_NOP(DisasContext
*s
, arg_NOP
*a
)
1679 static bool trans_YIELD(DisasContext
*s
, arg_YIELD
*a
)
1682 * When running in MTTCG we don't generate jumps to the yield and
1683 * WFE helpers as it won't affect the scheduling of other vCPUs.
1684 * If we wanted to more completely model WFE/SEV so we don't busy
1685 * spin unnecessarily we would need to do something more involved.
1687 if (!(tb_cflags(s
->base
.tb
) & CF_PARALLEL
)) {
1688 s
->base
.is_jmp
= DISAS_YIELD
;
1693 static bool trans_WFI(DisasContext
*s
, arg_WFI
*a
)
1695 s
->base
.is_jmp
= DISAS_WFI
;
1699 static bool trans_WFE(DisasContext
*s
, arg_WFI
*a
)
1702 * When running in MTTCG we don't generate jumps to the yield and
1703 * WFE helpers as it won't affect the scheduling of other vCPUs.
1704 * If we wanted to more completely model WFE/SEV so we don't busy
1705 * spin unnecessarily we would need to do something more involved.
1707 if (!(tb_cflags(s
->base
.tb
) & CF_PARALLEL
)) {
1708 s
->base
.is_jmp
= DISAS_WFE
;
1713 static bool trans_XPACLRI(DisasContext
*s
, arg_XPACLRI
*a
)
1715 if (s
->pauth_active
) {
1716 gen_helper_xpaci(cpu_X
[30], cpu_env
, cpu_X
[30]);
1721 static bool trans_PACIA1716(DisasContext
*s
, arg_PACIA1716
*a
)
1723 if (s
->pauth_active
) {
1724 gen_helper_pacia(cpu_X
[17], cpu_env
, cpu_X
[17], cpu_X
[16]);
1729 static bool trans_PACIB1716(DisasContext
*s
, arg_PACIB1716
*a
)
1731 if (s
->pauth_active
) {
1732 gen_helper_pacib(cpu_X
[17], cpu_env
, cpu_X
[17], cpu_X
[16]);
1737 static bool trans_AUTIA1716(DisasContext
*s
, arg_AUTIA1716
*a
)
1739 if (s
->pauth_active
) {
1740 gen_helper_autia(cpu_X
[17], cpu_env
, cpu_X
[17], cpu_X
[16]);
1745 static bool trans_AUTIB1716(DisasContext
*s
, arg_AUTIB1716
*a
)
1747 if (s
->pauth_active
) {
1748 gen_helper_autib(cpu_X
[17], cpu_env
, cpu_X
[17], cpu_X
[16]);
1753 static bool trans_ESB(DisasContext
*s
, arg_ESB
*a
)
1755 /* Without RAS, we must implement this as NOP. */
1756 if (dc_isar_feature(aa64_ras
, s
)) {
1758 * QEMU does not have a source of physical SErrors,
1759 * so we are only concerned with virtual SErrors.
1760 * The pseudocode in the ARM for this case is
1761 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1762 * AArch64.vESBOperation();
1763 * Most of the condition can be evaluated at translation time.
1764 * Test for EL2 present, and defer test for SEL2 to runtime.
1766 if (s
->current_el
<= 1 && arm_dc_feature(s
, ARM_FEATURE_EL2
)) {
1767 gen_helper_vesb(cpu_env
);
1773 static bool trans_PACIAZ(DisasContext
*s
, arg_PACIAZ
*a
)
1775 if (s
->pauth_active
) {
1776 gen_helper_pacia(cpu_X
[30], cpu_env
, cpu_X
[30], tcg_constant_i64(0));
1781 static bool trans_PACIASP(DisasContext
*s
, arg_PACIASP
*a
)
1783 if (s
->pauth_active
) {
1784 gen_helper_pacia(cpu_X
[30], cpu_env
, cpu_X
[30], cpu_X
[31]);
1789 static bool trans_PACIBZ(DisasContext
*s
, arg_PACIBZ
*a
)
1791 if (s
->pauth_active
) {
1792 gen_helper_pacib(cpu_X
[30], cpu_env
, cpu_X
[30], tcg_constant_i64(0));
1797 static bool trans_PACIBSP(DisasContext
*s
, arg_PACIBSP
*a
)
1799 if (s
->pauth_active
) {
1800 gen_helper_pacib(cpu_X
[30], cpu_env
, cpu_X
[30], cpu_X
[31]);
1805 static bool trans_AUTIAZ(DisasContext
*s
, arg_AUTIAZ
*a
)
1807 if (s
->pauth_active
) {
1808 gen_helper_autia(cpu_X
[30], cpu_env
, cpu_X
[30], tcg_constant_i64(0));
1813 static bool trans_AUTIASP(DisasContext
*s
, arg_AUTIASP
*a
)
1815 if (s
->pauth_active
) {
1816 gen_helper_autia(cpu_X
[30], cpu_env
, cpu_X
[30], cpu_X
[31]);
1821 static bool trans_AUTIBZ(DisasContext
*s
, arg_AUTIBZ
*a
)
1823 if (s
->pauth_active
) {
1824 gen_helper_autib(cpu_X
[30], cpu_env
, cpu_X
[30], tcg_constant_i64(0));
1829 static bool trans_AUTIBSP(DisasContext
*s
, arg_AUTIBSP
*a
)
1831 if (s
->pauth_active
) {
1832 gen_helper_autib(cpu_X
[30], cpu_env
, cpu_X
[30], cpu_X
[31]);
1837 static bool trans_CLREX(DisasContext
*s
, arg_CLREX
*a
)
1839 tcg_gen_movi_i64(cpu_exclusive_addr
, -1);
1843 static bool trans_DSB_DMB(DisasContext
*s
, arg_DSB_DMB
*a
)
1845 /* We handle DSB and DMB the same way */
1849 case 1: /* MBReqTypes_Reads */
1850 bar
= TCG_BAR_SC
| TCG_MO_LD_LD
| TCG_MO_LD_ST
;
1852 case 2: /* MBReqTypes_Writes */
1853 bar
= TCG_BAR_SC
| TCG_MO_ST_ST
;
1855 default: /* MBReqTypes_All */
1856 bar
= TCG_BAR_SC
| TCG_MO_ALL
;
1863 static bool trans_ISB(DisasContext
*s
, arg_ISB
*a
)
1866 * We need to break the TB after this insn to execute
1867 * self-modifying code correctly and also to take
1868 * any pending interrupts immediately.
1871 gen_goto_tb(s
, 0, 4);
1875 static bool trans_SB(DisasContext
*s
, arg_SB
*a
)
1877 if (!dc_isar_feature(aa64_sb
, s
)) {
1881 * TODO: There is no speculation barrier opcode for TCG;
1882 * MB and end the TB instead.
1884 tcg_gen_mb(TCG_MO_ALL
| TCG_BAR_SC
);
1885 gen_goto_tb(s
, 0, 4);
1889 static bool trans_CFINV(DisasContext
*s
, arg_CFINV
*a
)
1891 if (!dc_isar_feature(aa64_condm_4
, s
)) {
1894 tcg_gen_xori_i32(cpu_CF
, cpu_CF
, 1);
1898 static bool trans_XAFLAG(DisasContext
*s
, arg_XAFLAG
*a
)
1902 if (!dc_isar_feature(aa64_condm_5
, s
)) {
1906 z
= tcg_temp_new_i32();
1908 tcg_gen_setcondi_i32(TCG_COND_EQ
, z
, cpu_ZF
, 0);
1917 tcg_gen_or_i32(cpu_NF
, cpu_CF
, z
);
1918 tcg_gen_subi_i32(cpu_NF
, cpu_NF
, 1);
1921 tcg_gen_and_i32(cpu_ZF
, z
, cpu_CF
);
1922 tcg_gen_xori_i32(cpu_ZF
, cpu_ZF
, 1);
1924 /* (!C & Z) << 31 -> -(Z & ~C) */
1925 tcg_gen_andc_i32(cpu_VF
, z
, cpu_CF
);
1926 tcg_gen_neg_i32(cpu_VF
, cpu_VF
);
1929 tcg_gen_or_i32(cpu_CF
, cpu_CF
, z
);
1934 static bool trans_AXFLAG(DisasContext
*s
, arg_AXFLAG
*a
)
1936 if (!dc_isar_feature(aa64_condm_5
, s
)) {
1940 tcg_gen_sari_i32(cpu_VF
, cpu_VF
, 31); /* V ? -1 : 0 */
1941 tcg_gen_andc_i32(cpu_CF
, cpu_CF
, cpu_VF
); /* C & !V */
1943 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1944 tcg_gen_andc_i32(cpu_ZF
, cpu_ZF
, cpu_VF
);
1946 tcg_gen_movi_i32(cpu_NF
, 0);
1947 tcg_gen_movi_i32(cpu_VF
, 0);
1952 static bool trans_MSR_i_UAO(DisasContext
*s
, arg_i
*a
)
1954 if (!dc_isar_feature(aa64_uao
, s
) || s
->current_el
== 0) {
1958 set_pstate_bits(PSTATE_UAO
);
1960 clear_pstate_bits(PSTATE_UAO
);
1962 gen_rebuild_hflags(s
);
1963 s
->base
.is_jmp
= DISAS_TOO_MANY
;
1967 static bool trans_MSR_i_PAN(DisasContext
*s
, arg_i
*a
)
1969 if (!dc_isar_feature(aa64_pan
, s
) || s
->current_el
== 0) {
1973 set_pstate_bits(PSTATE_PAN
);
1975 clear_pstate_bits(PSTATE_PAN
);
1977 gen_rebuild_hflags(s
);
1978 s
->base
.is_jmp
= DISAS_TOO_MANY
;
1982 static bool trans_MSR_i_SPSEL(DisasContext
*s
, arg_i
*a
)
1984 if (s
->current_el
== 0) {
1987 gen_helper_msr_i_spsel(cpu_env
, tcg_constant_i32(a
->imm
& PSTATE_SP
));
1988 s
->base
.is_jmp
= DISAS_TOO_MANY
;
1992 static bool trans_MSR_i_SBSS(DisasContext
*s
, arg_i
*a
)
1994 if (!dc_isar_feature(aa64_ssbs
, s
)) {
1998 set_pstate_bits(PSTATE_SSBS
);
2000 clear_pstate_bits(PSTATE_SSBS
);
2002 /* Don't need to rebuild hflags since SSBS is a nop */
2003 s
->base
.is_jmp
= DISAS_TOO_MANY
;
2007 static bool trans_MSR_i_DIT(DisasContext
*s
, arg_i
*a
)
2009 if (!dc_isar_feature(aa64_dit
, s
)) {
2013 set_pstate_bits(PSTATE_DIT
);
2015 clear_pstate_bits(PSTATE_DIT
);
2017 /* There's no need to rebuild hflags because DIT is a nop */
2018 s
->base
.is_jmp
= DISAS_TOO_MANY
;
2022 static bool trans_MSR_i_TCO(DisasContext
*s
, arg_i
*a
)
2024 if (dc_isar_feature(aa64_mte
, s
)) {
2025 /* Full MTE is enabled -- set the TCO bit as directed. */
2027 set_pstate_bits(PSTATE_TCO
);
2029 clear_pstate_bits(PSTATE_TCO
);
2031 gen_rebuild_hflags(s
);
2032 /* Many factors, including TCO, go into MTE_ACTIVE. */
2033 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
2035 } else if (dc_isar_feature(aa64_mte_insn_reg
, s
)) {
2036 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */
2039 /* Insn not present */
2044 static bool trans_MSR_i_DAIFSET(DisasContext
*s
, arg_i
*a
)
2046 gen_helper_msr_i_daifset(cpu_env
, tcg_constant_i32(a
->imm
));
2047 s
->base
.is_jmp
= DISAS_TOO_MANY
;
2051 static bool trans_MSR_i_DAIFCLEAR(DisasContext
*s
, arg_i
*a
)
2053 gen_helper_msr_i_daifclear(cpu_env
, tcg_constant_i32(a
->imm
));
2054 /* Exit the cpu loop to re-evaluate pending IRQs. */
2055 s
->base
.is_jmp
= DISAS_UPDATE_EXIT
;
2059 static bool trans_MSR_i_SVCR(DisasContext
*s
, arg_MSR_i_SVCR
*a
)
2061 if (!dc_isar_feature(aa64_sme
, s
) || a
->mask
== 0) {
2064 if (sme_access_check(s
)) {
2065 int old
= s
->pstate_sm
| (s
->pstate_za
<< 1);
2066 int new = a
->imm
* 3;
2068 if ((old
^ new) & a
->mask
) {
2069 /* At least one bit changes. */
2070 gen_helper_set_svcr(cpu_env
, tcg_constant_i32(new),
2071 tcg_constant_i32(a
->mask
));
2072 s
->base
.is_jmp
= DISAS_TOO_MANY
;
2078 static void gen_get_nzcv(TCGv_i64 tcg_rt
)
2080 TCGv_i32 tmp
= tcg_temp_new_i32();
2081 TCGv_i32 nzcv
= tcg_temp_new_i32();
2083 /* build bit 31, N */
2084 tcg_gen_andi_i32(nzcv
, cpu_NF
, (1U << 31));
2085 /* build bit 30, Z */
2086 tcg_gen_setcondi_i32(TCG_COND_EQ
, tmp
, cpu_ZF
, 0);
2087 tcg_gen_deposit_i32(nzcv
, nzcv
, tmp
, 30, 1);
2088 /* build bit 29, C */
2089 tcg_gen_deposit_i32(nzcv
, nzcv
, cpu_CF
, 29, 1);
2090 /* build bit 28, V */
2091 tcg_gen_shri_i32(tmp
, cpu_VF
, 31);
2092 tcg_gen_deposit_i32(nzcv
, nzcv
, tmp
, 28, 1);
2093 /* generate result */
2094 tcg_gen_extu_i32_i64(tcg_rt
, nzcv
);
2097 static void gen_set_nzcv(TCGv_i64 tcg_rt
)
2099 TCGv_i32 nzcv
= tcg_temp_new_i32();
2101 /* take NZCV from R[t] */
2102 tcg_gen_extrl_i64_i32(nzcv
, tcg_rt
);
2105 tcg_gen_andi_i32(cpu_NF
, nzcv
, (1U << 31));
2107 tcg_gen_andi_i32(cpu_ZF
, nzcv
, (1 << 30));
2108 tcg_gen_setcondi_i32(TCG_COND_EQ
, cpu_ZF
, cpu_ZF
, 0);
2110 tcg_gen_andi_i32(cpu_CF
, nzcv
, (1 << 29));
2111 tcg_gen_shri_i32(cpu_CF
, cpu_CF
, 29);
2113 tcg_gen_andi_i32(cpu_VF
, nzcv
, (1 << 28));
2114 tcg_gen_shli_i32(cpu_VF
, cpu_VF
, 3);
2117 static void gen_sysreg_undef(DisasContext
*s
, bool isread
,
2118 uint8_t op0
, uint8_t op1
, uint8_t op2
,
2119 uint8_t crn
, uint8_t crm
, uint8_t rt
)
2122 * Generate code to emit an UNDEF with correct syndrome
2123 * information for a failed system register access.
2124 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2125 * but if FEAT_IDST is implemented then read accesses to registers
2126 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2131 if (isread
&& dc_isar_feature(aa64_ids
, s
) &&
2132 arm_cpreg_encoding_in_idspace(op0
, op1
, op2
, crn
, crm
)) {
2133 syndrome
= syn_aa64_sysregtrap(op0
, op1
, op2
, crn
, crm
, rt
, isread
);
2135 syndrome
= syn_uncategorized();
2137 gen_exception_insn(s
, 0, EXCP_UDEF
, syndrome
);
2140 /* MRS - move from system register
2141 * MSR (register) - move to system register
2144 * These are all essentially the same insn in 'read' and 'write'
2145 * versions, with varying op0 fields.
2147 static void handle_sys(DisasContext
*s
, bool isread
,
2148 unsigned int op0
, unsigned int op1
, unsigned int op2
,
2149 unsigned int crn
, unsigned int crm
, unsigned int rt
)
2151 uint32_t key
= ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP
,
2152 crn
, crm
, op0
, op1
, op2
);
2153 const ARMCPRegInfo
*ri
= get_arm_cp_reginfo(s
->cp_regs
, key
);
2154 bool need_exit_tb
= false;
2155 TCGv_ptr tcg_ri
= NULL
;
2159 if (crn
== 11 || crn
== 15) {
2161 * Check for TIDCP trap, which must take precedence over
2162 * the UNDEF for "no such register" etc.
2164 syndrome
= syn_aa64_sysregtrap(op0
, op1
, op2
, crn
, crm
, rt
, isread
);
2165 switch (s
->current_el
) {
2167 gen_helper_tidcp_el1(cpu_env
, tcg_constant_i32(syndrome
));
2173 /* Unknown register; this might be a guest error or a QEMU
2174 * unimplemented feature.
2176 qemu_log_mask(LOG_UNIMP
, "%s access to unsupported AArch64 "
2177 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2178 isread
? "read" : "write", op0
, op1
, crn
, crm
, op2
);
2179 gen_sysreg_undef(s
, isread
, op0
, op1
, op2
, crn
, crm
, rt
);
2183 /* Check access permissions */
2184 if (!cp_access_ok(s
->current_el
, ri
, isread
)) {
2185 gen_sysreg_undef(s
, isread
, op0
, op1
, op2
, crn
, crm
, rt
);
2189 if (ri
->accessfn
|| (ri
->fgt
&& s
->fgt_active
)) {
2190 /* Emit code to perform further access permissions checks at
2191 * runtime; this may result in an exception.
2193 syndrome
= syn_aa64_sysregtrap(op0
, op1
, op2
, crn
, crm
, rt
, isread
);
2194 gen_a64_update_pc(s
, 0);
2195 tcg_ri
= tcg_temp_new_ptr();
2196 gen_helper_access_check_cp_reg(tcg_ri
, cpu_env
,
2197 tcg_constant_i32(key
),
2198 tcg_constant_i32(syndrome
),
2199 tcg_constant_i32(isread
));
2200 } else if (ri
->type
& ARM_CP_RAISES_EXC
) {
2202 * The readfn or writefn might raise an exception;
2203 * synchronize the CPU state in case it does.
2205 gen_a64_update_pc(s
, 0);
2208 /* Handle special cases first */
2209 switch (ri
->type
& ARM_CP_SPECIAL_MASK
) {
2215 tcg_rt
= cpu_reg(s
, rt
);
2217 gen_get_nzcv(tcg_rt
);
2219 gen_set_nzcv(tcg_rt
);
2222 case ARM_CP_CURRENTEL
:
2223 /* Reads as current EL value from pstate, which is
2224 * guaranteed to be constant by the tb flags.
2226 tcg_rt
= cpu_reg(s
, rt
);
2227 tcg_gen_movi_i64(tcg_rt
, s
->current_el
<< 2);
2230 /* Writes clear the aligned block of memory which rt points into. */
2231 if (s
->mte_active
[0]) {
2234 desc
= FIELD_DP32(desc
, MTEDESC
, MIDX
, get_mem_index(s
));
2235 desc
= FIELD_DP32(desc
, MTEDESC
, TBI
, s
->tbid
);
2236 desc
= FIELD_DP32(desc
, MTEDESC
, TCMA
, s
->tcma
);
2238 tcg_rt
= tcg_temp_new_i64();
2239 gen_helper_mte_check_zva(tcg_rt
, cpu_env
,
2240 tcg_constant_i32(desc
), cpu_reg(s
, rt
));
2242 tcg_rt
= clean_data_tbi(s
, cpu_reg(s
, rt
));
2244 gen_helper_dc_zva(cpu_env
, tcg_rt
);
2248 TCGv_i64 clean_addr
, tag
;
2251 * DC_GVA, like DC_ZVA, requires that we supply the original
2252 * pointer for an invalid page. Probe that address first.
2254 tcg_rt
= cpu_reg(s
, rt
);
2255 clean_addr
= clean_data_tbi(s
, tcg_rt
);
2256 gen_probe_access(s
, clean_addr
, MMU_DATA_STORE
, MO_8
);
2259 /* Extract the tag from the register to match STZGM. */
2260 tag
= tcg_temp_new_i64();
2261 tcg_gen_shri_i64(tag
, tcg_rt
, 56);
2262 gen_helper_stzgm_tags(cpu_env
, clean_addr
, tag
);
2266 case ARM_CP_DC_GZVA
:
2268 TCGv_i64 clean_addr
, tag
;
2270 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2271 tcg_rt
= cpu_reg(s
, rt
);
2272 clean_addr
= clean_data_tbi(s
, tcg_rt
);
2273 gen_helper_dc_zva(cpu_env
, clean_addr
);
2276 /* Extract the tag from the register to match STZGM. */
2277 tag
= tcg_temp_new_i64();
2278 tcg_gen_shri_i64(tag
, tcg_rt
, 56);
2279 gen_helper_stzgm_tags(cpu_env
, clean_addr
, tag
);
2284 g_assert_not_reached();
2286 if ((ri
->type
& ARM_CP_FPU
) && !fp_access_check_only(s
)) {
2288 } else if ((ri
->type
& ARM_CP_SVE
) && !sve_access_check(s
)) {
2290 } else if ((ri
->type
& ARM_CP_SME
) && !sme_access_check(s
)) {
2294 if (ri
->type
& ARM_CP_IO
) {
2295 /* I/O operations must end the TB here (whether read or write) */
2296 need_exit_tb
= translator_io_start(&s
->base
);
2299 tcg_rt
= cpu_reg(s
, rt
);
2302 if (ri
->type
& ARM_CP_CONST
) {
2303 tcg_gen_movi_i64(tcg_rt
, ri
->resetvalue
);
2304 } else if (ri
->readfn
) {
2306 tcg_ri
= gen_lookup_cp_reg(key
);
2308 gen_helper_get_cp_reg64(tcg_rt
, cpu_env
, tcg_ri
);
2310 tcg_gen_ld_i64(tcg_rt
, cpu_env
, ri
->fieldoffset
);
2313 if (ri
->type
& ARM_CP_CONST
) {
2314 /* If not forbidden by access permissions, treat as WI */
2316 } else if (ri
->writefn
) {
2318 tcg_ri
= gen_lookup_cp_reg(key
);
2320 gen_helper_set_cp_reg64(cpu_env
, tcg_ri
, tcg_rt
);
2322 tcg_gen_st_i64(tcg_rt
, cpu_env
, ri
->fieldoffset
);
2326 if (!isread
&& !(ri
->type
& ARM_CP_SUPPRESS_TB_END
)) {
2328 * A write to any coprocessor register that ends a TB
2329 * must rebuild the hflags for the next TB.
2331 gen_rebuild_hflags(s
);
2333 * We default to ending the TB on a coprocessor register write,
2334 * but allow this to be suppressed by the register definition
2335 * (usually only necessary to work around guest bugs).
2337 need_exit_tb
= true;
2340 s
->base
.is_jmp
= DISAS_UPDATE_EXIT
;
2344 static bool trans_SYS(DisasContext
*s
, arg_SYS
*a
)
2346 handle_sys(s
, a
->l
, a
->op0
, a
->op1
, a
->op2
, a
->crn
, a
->crm
, a
->rt
);
2350 static bool trans_SVC(DisasContext
*s
, arg_i
*a
)
2353 * For SVC, HVC and SMC we advance the single-step state
2354 * machine before taking the exception. This is architecturally
2355 * mandated, to ensure that single-stepping a system call
2356 * instruction works properly.
2358 uint32_t syndrome
= syn_aa64_svc(a
->imm
);
2360 gen_exception_insn_el(s
, 0, EXCP_UDEF
, syndrome
, 2);
2364 gen_exception_insn(s
, 4, EXCP_SWI
, syndrome
);
2368 static bool trans_HVC(DisasContext
*s
, arg_i
*a
)
2370 if (s
->current_el
== 0) {
2371 unallocated_encoding(s
);
2375 * The pre HVC helper handles cases when HVC gets trapped
2376 * as an undefined insn by runtime configuration.
2378 gen_a64_update_pc(s
, 0);
2379 gen_helper_pre_hvc(cpu_env
);
2380 /* Architecture requires ss advance before we do the actual work */
2382 gen_exception_insn_el(s
, 4, EXCP_HVC
, syn_aa64_hvc(a
->imm
), 2);
2386 static bool trans_SMC(DisasContext
*s
, arg_i
*a
)
2388 if (s
->current_el
== 0) {
2389 unallocated_encoding(s
);
2392 gen_a64_update_pc(s
, 0);
2393 gen_helper_pre_smc(cpu_env
, tcg_constant_i32(syn_aa64_smc(a
->imm
)));
2394 /* Architecture requires ss advance before we do the actual work */
2396 gen_exception_insn_el(s
, 4, EXCP_SMC
, syn_aa64_smc(a
->imm
), 3);
2400 static bool trans_BRK(DisasContext
*s
, arg_i
*a
)
2402 gen_exception_bkpt_insn(s
, syn_aa64_bkpt(a
->imm
));
2406 static bool trans_HLT(DisasContext
*s
, arg_i
*a
)
2409 * HLT. This has two purposes.
2410 * Architecturally, it is an external halting debug instruction.
2411 * Since QEMU doesn't implement external debug, we treat this as
2412 * it is required for halting debug disabled: it will UNDEF.
2413 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2415 if (semihosting_enabled(s
->current_el
== 0) && a
->imm
== 0xf000) {
2416 gen_exception_internal_insn(s
, EXCP_SEMIHOST
);
2418 unallocated_encoding(s
);
2424 * Load/Store exclusive instructions are implemented by remembering
2425 * the value/address loaded, and seeing if these are the same
2426 * when the store is performed. This is not actually the architecturally
2427 * mandated semantics, but it works for typical guest code sequences
2428 * and avoids having to monitor regular stores.
2430 * The store exclusive uses the atomic cmpxchg primitives to avoid
2431 * races in multi-threaded linux-user and when MTTCG softmmu is
2434 static void gen_load_exclusive(DisasContext
*s
, int rt
, int rt2
, int rn
,
2435 int size
, bool is_pair
)
2437 int idx
= get_mem_index(s
);
2438 TCGv_i64 dirty_addr
, clean_addr
;
2439 MemOp memop
= check_atomic_align(s
, rn
, size
+ is_pair
);
2442 dirty_addr
= cpu_reg_sp(s
, rn
);
2443 clean_addr
= gen_mte_check1(s
, dirty_addr
, false, rn
!= 31, memop
);
2445 g_assert(size
<= 3);
2447 g_assert(size
>= 2);
2449 tcg_gen_qemu_ld_i64(cpu_exclusive_val
, clean_addr
, idx
, memop
);
2450 if (s
->be_data
== MO_LE
) {
2451 tcg_gen_extract_i64(cpu_reg(s
, rt
), cpu_exclusive_val
, 0, 32);
2452 tcg_gen_extract_i64(cpu_reg(s
, rt2
), cpu_exclusive_val
, 32, 32);
2454 tcg_gen_extract_i64(cpu_reg(s
, rt
), cpu_exclusive_val
, 32, 32);
2455 tcg_gen_extract_i64(cpu_reg(s
, rt2
), cpu_exclusive_val
, 0, 32);
2458 TCGv_i128 t16
= tcg_temp_new_i128();
2460 tcg_gen_qemu_ld_i128(t16
, clean_addr
, idx
, memop
);
2462 if (s
->be_data
== MO_LE
) {
2463 tcg_gen_extr_i128_i64(cpu_exclusive_val
,
2464 cpu_exclusive_high
, t16
);
2466 tcg_gen_extr_i128_i64(cpu_exclusive_high
,
2467 cpu_exclusive_val
, t16
);
2469 tcg_gen_mov_i64(cpu_reg(s
, rt
), cpu_exclusive_val
);
2470 tcg_gen_mov_i64(cpu_reg(s
, rt2
), cpu_exclusive_high
);
2473 tcg_gen_qemu_ld_i64(cpu_exclusive_val
, clean_addr
, idx
, memop
);
2474 tcg_gen_mov_i64(cpu_reg(s
, rt
), cpu_exclusive_val
);
2476 tcg_gen_mov_i64(cpu_exclusive_addr
, clean_addr
);
2479 static void gen_store_exclusive(DisasContext
*s
, int rd
, int rt
, int rt2
,
2480 int rn
, int size
, int is_pair
)
2482 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2483 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
2486 * [addr + datasize] = {Rt2};
2492 * env->exclusive_addr = -1;
2494 TCGLabel
*fail_label
= gen_new_label();
2495 TCGLabel
*done_label
= gen_new_label();
2496 TCGv_i64 tmp
, clean_addr
;
2500 * FIXME: We are out of spec here. We have recorded only the address
2501 * from load_exclusive, not the entire range, and we assume that the
2502 * size of the access on both sides match. The architecture allows the
2503 * store to be smaller than the load, so long as the stored bytes are
2504 * within the range recorded by the load.
2507 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2508 clean_addr
= clean_data_tbi(s
, cpu_reg_sp(s
, rn
));
2509 tcg_gen_brcond_i64(TCG_COND_NE
, clean_addr
, cpu_exclusive_addr
, fail_label
);
2512 * The write, and any associated faults, only happen if the virtual
2513 * and physical addresses pass the exclusive monitor check. These
2514 * faults are exceedingly unlikely, because normally the guest uses
2515 * the exact same address register for the load_exclusive, and we
2516 * would have recognized these faults there.
2518 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2519 * unaligned 4-byte write within the range of an aligned 8-byte load.
2520 * With LSE2, the store would need to cross a 16-byte boundary when the
2521 * load did not, which would mean the store is outside the range
2522 * recorded for the monitor, which would have failed a corrected monitor
2523 * check above. For now, we assume no size change and retain the
2524 * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2526 * It is possible to trigger an MTE fault, by performing the load with
2527 * a virtual address with a valid tag and performing the store with the
2528 * same virtual address and a different invalid tag.
2530 memop
= size
+ is_pair
;
2531 if (memop
== MO_128
|| !dc_isar_feature(aa64_lse2
, s
)) {
2534 memop
= finalize_memop(s
, memop
);
2535 gen_mte_check1(s
, cpu_reg_sp(s
, rn
), true, rn
!= 31, memop
);
2537 tmp
= tcg_temp_new_i64();
2540 if (s
->be_data
== MO_LE
) {
2541 tcg_gen_concat32_i64(tmp
, cpu_reg(s
, rt
), cpu_reg(s
, rt2
));
2543 tcg_gen_concat32_i64(tmp
, cpu_reg(s
, rt2
), cpu_reg(s
, rt
));
2545 tcg_gen_atomic_cmpxchg_i64(tmp
, cpu_exclusive_addr
,
2546 cpu_exclusive_val
, tmp
,
2547 get_mem_index(s
), memop
);
2548 tcg_gen_setcond_i64(TCG_COND_NE
, tmp
, tmp
, cpu_exclusive_val
);
2550 TCGv_i128 t16
= tcg_temp_new_i128();
2551 TCGv_i128 c16
= tcg_temp_new_i128();
2554 if (s
->be_data
== MO_LE
) {
2555 tcg_gen_concat_i64_i128(t16
, cpu_reg(s
, rt
), cpu_reg(s
, rt2
));
2556 tcg_gen_concat_i64_i128(c16
, cpu_exclusive_val
,
2557 cpu_exclusive_high
);
2559 tcg_gen_concat_i64_i128(t16
, cpu_reg(s
, rt2
), cpu_reg(s
, rt
));
2560 tcg_gen_concat_i64_i128(c16
, cpu_exclusive_high
,
2564 tcg_gen_atomic_cmpxchg_i128(t16
, cpu_exclusive_addr
, c16
, t16
,
2565 get_mem_index(s
), memop
);
2567 a
= tcg_temp_new_i64();
2568 b
= tcg_temp_new_i64();
2569 if (s
->be_data
== MO_LE
) {
2570 tcg_gen_extr_i128_i64(a
, b
, t16
);
2572 tcg_gen_extr_i128_i64(b
, a
, t16
);
2575 tcg_gen_xor_i64(a
, a
, cpu_exclusive_val
);
2576 tcg_gen_xor_i64(b
, b
, cpu_exclusive_high
);
2577 tcg_gen_or_i64(tmp
, a
, b
);
2579 tcg_gen_setcondi_i64(TCG_COND_NE
, tmp
, tmp
, 0);
2582 tcg_gen_atomic_cmpxchg_i64(tmp
, cpu_exclusive_addr
, cpu_exclusive_val
,
2583 cpu_reg(s
, rt
), get_mem_index(s
), memop
);
2584 tcg_gen_setcond_i64(TCG_COND_NE
, tmp
, tmp
, cpu_exclusive_val
);
2586 tcg_gen_mov_i64(cpu_reg(s
, rd
), tmp
);
2587 tcg_gen_br(done_label
);
2589 gen_set_label(fail_label
);
2590 tcg_gen_movi_i64(cpu_reg(s
, rd
), 1);
2591 gen_set_label(done_label
);
2592 tcg_gen_movi_i64(cpu_exclusive_addr
, -1);
2595 static void gen_compare_and_swap(DisasContext
*s
, int rs
, int rt
,
2598 TCGv_i64 tcg_rs
= cpu_reg(s
, rs
);
2599 TCGv_i64 tcg_rt
= cpu_reg(s
, rt
);
2600 int memidx
= get_mem_index(s
);
2601 TCGv_i64 clean_addr
;
2605 gen_check_sp_alignment(s
);
2607 memop
= check_atomic_align(s
, rn
, size
);
2608 clean_addr
= gen_mte_check1(s
, cpu_reg_sp(s
, rn
), true, rn
!= 31, memop
);
2609 tcg_gen_atomic_cmpxchg_i64(tcg_rs
, clean_addr
, tcg_rs
, tcg_rt
,
2613 static void gen_compare_and_swap_pair(DisasContext
*s
, int rs
, int rt
,
2616 TCGv_i64 s1
= cpu_reg(s
, rs
);
2617 TCGv_i64 s2
= cpu_reg(s
, rs
+ 1);
2618 TCGv_i64 t1
= cpu_reg(s
, rt
);
2619 TCGv_i64 t2
= cpu_reg(s
, rt
+ 1);
2620 TCGv_i64 clean_addr
;
2621 int memidx
= get_mem_index(s
);
2625 gen_check_sp_alignment(s
);
2628 /* This is a single atomic access, despite the "pair". */
2629 memop
= check_atomic_align(s
, rn
, size
+ 1);
2630 clean_addr
= gen_mte_check1(s
, cpu_reg_sp(s
, rn
), true, rn
!= 31, memop
);
2633 TCGv_i64 cmp
= tcg_temp_new_i64();
2634 TCGv_i64 val
= tcg_temp_new_i64();
2636 if (s
->be_data
== MO_LE
) {
2637 tcg_gen_concat32_i64(val
, t1
, t2
);
2638 tcg_gen_concat32_i64(cmp
, s1
, s2
);
2640 tcg_gen_concat32_i64(val
, t2
, t1
);
2641 tcg_gen_concat32_i64(cmp
, s2
, s1
);
2644 tcg_gen_atomic_cmpxchg_i64(cmp
, clean_addr
, cmp
, val
, memidx
, memop
);
2646 if (s
->be_data
== MO_LE
) {
2647 tcg_gen_extr32_i64(s1
, s2
, cmp
);
2649 tcg_gen_extr32_i64(s2
, s1
, cmp
);
2652 TCGv_i128 cmp
= tcg_temp_new_i128();
2653 TCGv_i128 val
= tcg_temp_new_i128();
2655 if (s
->be_data
== MO_LE
) {
2656 tcg_gen_concat_i64_i128(val
, t1
, t2
);
2657 tcg_gen_concat_i64_i128(cmp
, s1
, s2
);
2659 tcg_gen_concat_i64_i128(val
, t2
, t1
);
2660 tcg_gen_concat_i64_i128(cmp
, s2
, s1
);
2663 tcg_gen_atomic_cmpxchg_i128(cmp
, clean_addr
, cmp
, val
, memidx
, memop
);
2665 if (s
->be_data
== MO_LE
) {
2666 tcg_gen_extr_i128_i64(s1
, s2
, cmp
);
2668 tcg_gen_extr_i128_i64(s2
, s1
, cmp
);
2674 * Compute the ISS.SF bit for syndrome information if an exception
2675 * is taken on a load or store. This indicates whether the instruction
2676 * is accessing a 32-bit or 64-bit register. This logic is derived
2677 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2679 static bool ldst_iss_sf(int size
, bool sign
, bool ext
)
2684 * Signed loads are 64 bit results if we are not going to
2685 * do a zero-extend from 32 to 64 after the load.
2686 * (For a store, sign and ext are always false.)
2690 /* Unsigned loads/stores work at the specified size */
2691 return size
== MO_64
;
2695 static bool trans_STXR(DisasContext
*s
, arg_stxr
*a
)
2698 gen_check_sp_alignment(s
);
2701 tcg_gen_mb(TCG_MO_ALL
| TCG_BAR_STRL
);
2703 gen_store_exclusive(s
, a
->rs
, a
->rt
, a
->rt2
, a
->rn
, a
->sz
, false);
2707 static bool trans_LDXR(DisasContext
*s
, arg_stxr
*a
)
2710 gen_check_sp_alignment(s
);
2712 gen_load_exclusive(s
, a
->rt
, a
->rt2
, a
->rn
, a
->sz
, false);
2714 tcg_gen_mb(TCG_MO_ALL
| TCG_BAR_LDAQ
);
2719 static bool trans_STLR(DisasContext
*s
, arg_stlr
*a
)
2721 TCGv_i64 clean_addr
;
2723 bool iss_sf
= ldst_iss_sf(a
->sz
, false, false);
2726 * StoreLORelease is the same as Store-Release for QEMU, but
2727 * needs the feature-test.
2729 if (!a
->lasr
&& !dc_isar_feature(aa64_lor
, s
)) {
2732 /* Generate ISS for non-exclusive accesses including LASR. */
2734 gen_check_sp_alignment(s
);
2736 tcg_gen_mb(TCG_MO_ALL
| TCG_BAR_STRL
);
2737 memop
= check_ordered_align(s
, a
->rn
, 0, true, a
->sz
);
2738 clean_addr
= gen_mte_check1(s
, cpu_reg_sp(s
, a
->rn
),
2739 true, a
->rn
!= 31, memop
);
2740 do_gpr_st(s
, cpu_reg(s
, a
->rt
), clean_addr
, memop
, true, a
->rt
,
2745 static bool trans_LDAR(DisasContext
*s
, arg_stlr
*a
)
2747 TCGv_i64 clean_addr
;
2749 bool iss_sf
= ldst_iss_sf(a
->sz
, false, false);
2751 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
2752 if (!a
->lasr
&& !dc_isar_feature(aa64_lor
, s
)) {
2755 /* Generate ISS for non-exclusive accesses including LASR. */
2757 gen_check_sp_alignment(s
);
2759 memop
= check_ordered_align(s
, a
->rn
, 0, false, a
->sz
);
2760 clean_addr
= gen_mte_check1(s
, cpu_reg_sp(s
, a
->rn
),
2761 false, a
->rn
!= 31, memop
);
2762 do_gpr_ld(s
, cpu_reg(s
, a
->rt
), clean_addr
, memop
, false, true,
2763 a
->rt
, iss_sf
, a
->lasr
);
2764 tcg_gen_mb(TCG_MO_ALL
| TCG_BAR_LDAQ
);
2768 static bool trans_STXP(DisasContext
*s
, arg_stxr
*a
)
2771 gen_check_sp_alignment(s
);
2774 tcg_gen_mb(TCG_MO_ALL
| TCG_BAR_STRL
);
2776 gen_store_exclusive(s
, a
->rs
, a
->rt
, a
->rt2
, a
->rn
, a
->sz
, true);
2780 static bool trans_LDXP(DisasContext
*s
, arg_stxr
*a
)
2783 gen_check_sp_alignment(s
);
2785 gen_load_exclusive(s
, a
->rt
, a
->rt2
, a
->rn
, a
->sz
, true);
2787 tcg_gen_mb(TCG_MO_ALL
| TCG_BAR_LDAQ
);
2792 static bool trans_CASP(DisasContext
*s
, arg_CASP
*a
)
2794 if (!dc_isar_feature(aa64_atomics
, s
)) {
2797 if (((a
->rt
| a
->rs
) & 1) != 0) {
2801 gen_compare_and_swap_pair(s
, a
->rs
, a
->rt
, a
->rn
, a
->sz
);
2805 static bool trans_CAS(DisasContext
*s
, arg_CAS
*a
)
2807 if (!dc_isar_feature(aa64_atomics
, s
)) {
2810 gen_compare_and_swap(s
, a
->rs
, a
->rt
, a
->rn
, a
->sz
);
2814 static bool trans_LD_lit(DisasContext
*s
, arg_ldlit
*a
)
2816 bool iss_sf
= ldst_iss_sf(a
->sz
, a
->sign
, false);
2817 TCGv_i64 tcg_rt
= cpu_reg(s
, a
->rt
);
2818 TCGv_i64 clean_addr
= tcg_temp_new_i64();
2819 MemOp memop
= finalize_memop(s
, a
->sz
+ a
->sign
* MO_SIGN
);
2821 gen_pc_plus_diff(s
, clean_addr
, a
->imm
);
2822 do_gpr_ld(s
, tcg_rt
, clean_addr
, memop
,
2823 false, true, a
->rt
, iss_sf
, false);
2827 static bool trans_LD_lit_v(DisasContext
*s
, arg_ldlit
*a
)
2829 /* Load register (literal), vector version */
2830 TCGv_i64 clean_addr
;
2833 if (!fp_access_check(s
)) {
2836 memop
= finalize_memop_asimd(s
, a
->sz
);
2837 clean_addr
= tcg_temp_new_i64();
2838 gen_pc_plus_diff(s
, clean_addr
, a
->imm
);
2839 do_fp_ld(s
, a
->rt
, clean_addr
, memop
);
2843 static void op_addr_ldstpair_pre(DisasContext
*s
, arg_ldstpair
*a
,
2844 TCGv_i64
*clean_addr
, TCGv_i64
*dirty_addr
,
2845 uint64_t offset
, bool is_store
, MemOp mop
)
2848 gen_check_sp_alignment(s
);
2851 *dirty_addr
= read_cpu_reg_sp(s
, a
->rn
, 1);
2853 tcg_gen_addi_i64(*dirty_addr
, *dirty_addr
, offset
);
2856 *clean_addr
= gen_mte_checkN(s
, *dirty_addr
, is_store
,
2857 (a
->w
|| a
->rn
!= 31), 2 << a
->sz
, mop
);
2860 static void op_addr_ldstpair_post(DisasContext
*s
, arg_ldstpair
*a
,
2861 TCGv_i64 dirty_addr
, uint64_t offset
)
2865 tcg_gen_addi_i64(dirty_addr
, dirty_addr
, offset
);
2867 tcg_gen_mov_i64(cpu_reg_sp(s
, a
->rn
), dirty_addr
);
2871 static bool trans_STP(DisasContext
*s
, arg_ldstpair
*a
)
2873 uint64_t offset
= a
->imm
<< a
->sz
;
2874 TCGv_i64 clean_addr
, dirty_addr
, tcg_rt
, tcg_rt2
;
2875 MemOp mop
= finalize_memop(s
, a
->sz
);
2877 op_addr_ldstpair_pre(s
, a
, &clean_addr
, &dirty_addr
, offset
, true, mop
);
2878 tcg_rt
= cpu_reg(s
, a
->rt
);
2879 tcg_rt2
= cpu_reg(s
, a
->rt2
);
2881 * We built mop above for the single logical access -- rebuild it
2882 * now for the paired operation.
2884 * With LSE2, non-sign-extending pairs are treated atomically if
2885 * aligned, and if unaligned one of the pair will be completely
2886 * within a 16-byte block and that element will be atomic.
2887 * Otherwise each element is separately atomic.
2888 * In all cases, issue one operation with the correct atomicity.
2892 mop
|= (a
->sz
== 2 ? MO_ALIGN_4
: MO_ALIGN_8
);
2894 mop
= finalize_memop_pair(s
, mop
);
2896 TCGv_i64 tmp
= tcg_temp_new_i64();
2898 if (s
->be_data
== MO_LE
) {
2899 tcg_gen_concat32_i64(tmp
, tcg_rt
, tcg_rt2
);
2901 tcg_gen_concat32_i64(tmp
, tcg_rt2
, tcg_rt
);
2903 tcg_gen_qemu_st_i64(tmp
, clean_addr
, get_mem_index(s
), mop
);
2905 TCGv_i128 tmp
= tcg_temp_new_i128();
2907 if (s
->be_data
== MO_LE
) {
2908 tcg_gen_concat_i64_i128(tmp
, tcg_rt
, tcg_rt2
);
2910 tcg_gen_concat_i64_i128(tmp
, tcg_rt2
, tcg_rt
);
2912 tcg_gen_qemu_st_i128(tmp
, clean_addr
, get_mem_index(s
), mop
);
2914 op_addr_ldstpair_post(s
, a
, dirty_addr
, offset
);
2918 static bool trans_LDP(DisasContext
*s
, arg_ldstpair
*a
)
2920 uint64_t offset
= a
->imm
<< a
->sz
;
2921 TCGv_i64 clean_addr
, dirty_addr
, tcg_rt
, tcg_rt2
;
2922 MemOp mop
= finalize_memop(s
, a
->sz
);
2924 op_addr_ldstpair_pre(s
, a
, &clean_addr
, &dirty_addr
, offset
, false, mop
);
2925 tcg_rt
= cpu_reg(s
, a
->rt
);
2926 tcg_rt2
= cpu_reg(s
, a
->rt2
);
2929 * We built mop above for the single logical access -- rebuild it
2930 * now for the paired operation.
2932 * With LSE2, non-sign-extending pairs are treated atomically if
2933 * aligned, and if unaligned one of the pair will be completely
2934 * within a 16-byte block and that element will be atomic.
2935 * Otherwise each element is separately atomic.
2936 * In all cases, issue one operation with the correct atomicity.
2938 * This treats sign-extending loads like zero-extending loads,
2939 * since that reuses the most code below.
2943 mop
|= (a
->sz
== 2 ? MO_ALIGN_4
: MO_ALIGN_8
);
2945 mop
= finalize_memop_pair(s
, mop
);
2947 int o2
= s
->be_data
== MO_LE
? 32 : 0;
2950 tcg_gen_qemu_ld_i64(tcg_rt
, clean_addr
, get_mem_index(s
), mop
);
2952 tcg_gen_sextract_i64(tcg_rt2
, tcg_rt
, o2
, 32);
2953 tcg_gen_sextract_i64(tcg_rt
, tcg_rt
, o1
, 32);
2955 tcg_gen_extract_i64(tcg_rt2
, tcg_rt
, o2
, 32);
2956 tcg_gen_extract_i64(tcg_rt
, tcg_rt
, o1
, 32);
2959 TCGv_i128 tmp
= tcg_temp_new_i128();
2961 tcg_gen_qemu_ld_i128(tmp
, clean_addr
, get_mem_index(s
), mop
);
2962 if (s
->be_data
== MO_LE
) {
2963 tcg_gen_extr_i128_i64(tcg_rt
, tcg_rt2
, tmp
);
2965 tcg_gen_extr_i128_i64(tcg_rt2
, tcg_rt
, tmp
);
2968 op_addr_ldstpair_post(s
, a
, dirty_addr
, offset
);
2972 static bool trans_STP_v(DisasContext
*s
, arg_ldstpair
*a
)
2974 uint64_t offset
= a
->imm
<< a
->sz
;
2975 TCGv_i64 clean_addr
, dirty_addr
;
2978 if (!fp_access_check(s
)) {
2982 /* LSE2 does not merge FP pairs; leave these as separate operations. */
2983 mop
= finalize_memop_asimd(s
, a
->sz
);
2984 op_addr_ldstpair_pre(s
, a
, &clean_addr
, &dirty_addr
, offset
, true, mop
);
2985 do_fp_st(s
, a
->rt
, clean_addr
, mop
);
2986 tcg_gen_addi_i64(clean_addr
, clean_addr
, 1 << a
->sz
);
2987 do_fp_st(s
, a
->rt2
, clean_addr
, mop
);
2988 op_addr_ldstpair_post(s
, a
, dirty_addr
, offset
);
2992 static bool trans_LDP_v(DisasContext
*s
, arg_ldstpair
*a
)
2994 uint64_t offset
= a
->imm
<< a
->sz
;
2995 TCGv_i64 clean_addr
, dirty_addr
;
2998 if (!fp_access_check(s
)) {
3002 /* LSE2 does not merge FP pairs; leave these as separate operations. */
3003 mop
= finalize_memop_asimd(s
, a
->sz
);
3004 op_addr_ldstpair_pre(s
, a
, &clean_addr
, &dirty_addr
, offset
, false, mop
);
3005 do_fp_ld(s
, a
->rt
, clean_addr
, mop
);
3006 tcg_gen_addi_i64(clean_addr
, clean_addr
, 1 << a
->sz
);
3007 do_fp_ld(s
, a
->rt2
, clean_addr
, mop
);
3008 op_addr_ldstpair_post(s
, a
, dirty_addr
, offset
);
3012 static bool trans_STGP(DisasContext
*s
, arg_ldstpair
*a
)
3014 TCGv_i64 clean_addr
, dirty_addr
, tcg_rt
, tcg_rt2
;
3015 uint64_t offset
= a
->imm
<< LOG2_TAG_GRANULE
;
3019 /* STGP only comes in one size. */
3020 tcg_debug_assert(a
->sz
== MO_64
);
3022 if (!dc_isar_feature(aa64_mte_insn_reg
, s
)) {
3027 gen_check_sp_alignment(s
);
3030 dirty_addr
= read_cpu_reg_sp(s
, a
->rn
, 1);
3032 tcg_gen_addi_i64(dirty_addr
, dirty_addr
, offset
);
3035 clean_addr
= clean_data_tbi(s
, dirty_addr
);
3036 tcg_rt
= cpu_reg(s
, a
->rt
);
3037 tcg_rt2
= cpu_reg(s
, a
->rt2
);
3040 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3041 * and one tag operation. We implement it as one single aligned 16-byte
3042 * memory operation for convenience. Note that the alignment ensures
3043 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3045 mop
= finalize_memop_atom(s
, MO_128
| MO_ALIGN
, MO_ATOM_IFALIGN_PAIR
);
3047 tmp
= tcg_temp_new_i128();
3048 if (s
->be_data
== MO_LE
) {
3049 tcg_gen_concat_i64_i128(tmp
, tcg_rt
, tcg_rt2
);
3051 tcg_gen_concat_i64_i128(tmp
, tcg_rt2
, tcg_rt
);
3053 tcg_gen_qemu_st_i128(tmp
, clean_addr
, get_mem_index(s
), mop
);
3055 /* Perform the tag store, if tag access enabled. */
3057 if (tb_cflags(s
->base
.tb
) & CF_PARALLEL
) {
3058 gen_helper_stg_parallel(cpu_env
, dirty_addr
, dirty_addr
);
3060 gen_helper_stg(cpu_env
, dirty_addr
, dirty_addr
);
3064 op_addr_ldstpair_post(s
, a
, dirty_addr
, offset
);
3068 static void op_addr_ldst_imm_pre(DisasContext
*s
, arg_ldst_imm
*a
,
3069 TCGv_i64
*clean_addr
, TCGv_i64
*dirty_addr
,
3070 uint64_t offset
, bool is_store
, MemOp mop
)
3075 gen_check_sp_alignment(s
);
3078 *dirty_addr
= read_cpu_reg_sp(s
, a
->rn
, 1);
3080 tcg_gen_addi_i64(*dirty_addr
, *dirty_addr
, offset
);
3082 memidx
= a
->unpriv
? get_a64_user_mem_index(s
) : get_mem_index(s
);
3083 *clean_addr
= gen_mte_check1_mmuidx(s
, *dirty_addr
, is_store
,
3084 a
->w
|| a
->rn
!= 31,
3085 mop
, a
->unpriv
, memidx
);
3088 static void op_addr_ldst_imm_post(DisasContext
*s
, arg_ldst_imm
*a
,
3089 TCGv_i64 dirty_addr
, uint64_t offset
)
3093 tcg_gen_addi_i64(dirty_addr
, dirty_addr
, offset
);
3095 tcg_gen_mov_i64(cpu_reg_sp(s
, a
->rn
), dirty_addr
);
3099 static bool trans_STR_i(DisasContext
*s
, arg_ldst_imm
*a
)
3101 bool iss_sf
, iss_valid
= !a
->w
;
3102 TCGv_i64 clean_addr
, dirty_addr
, tcg_rt
;
3103 int memidx
= a
->unpriv
? get_a64_user_mem_index(s
) : get_mem_index(s
);
3104 MemOp mop
= finalize_memop(s
, a
->sz
+ a
->sign
* MO_SIGN
);
3106 op_addr_ldst_imm_pre(s
, a
, &clean_addr
, &dirty_addr
, a
->imm
, true, mop
);
3108 tcg_rt
= cpu_reg(s
, a
->rt
);
3109 iss_sf
= ldst_iss_sf(a
->sz
, a
->sign
, a
->ext
);
3111 do_gpr_st_memidx(s
, tcg_rt
, clean_addr
, mop
, memidx
,
3112 iss_valid
, a
->rt
, iss_sf
, false);
3113 op_addr_ldst_imm_post(s
, a
, dirty_addr
, a
->imm
);
3117 static bool trans_LDR_i(DisasContext
*s
, arg_ldst_imm
*a
)
3119 bool iss_sf
, iss_valid
= !a
->w
;
3120 TCGv_i64 clean_addr
, dirty_addr
, tcg_rt
;
3121 int memidx
= a
->unpriv
? get_a64_user_mem_index(s
) : get_mem_index(s
);
3122 MemOp mop
= finalize_memop(s
, a
->sz
+ a
->sign
* MO_SIGN
);
3124 op_addr_ldst_imm_pre(s
, a
, &clean_addr
, &dirty_addr
, a
->imm
, false, mop
);
3126 tcg_rt
= cpu_reg(s
, a
->rt
);
3127 iss_sf
= ldst_iss_sf(a
->sz
, a
->sign
, a
->ext
);
3129 do_gpr_ld_memidx(s
, tcg_rt
, clean_addr
, mop
,
3130 a
->ext
, memidx
, iss_valid
, a
->rt
, iss_sf
, false);
3131 op_addr_ldst_imm_post(s
, a
, dirty_addr
, a
->imm
);
3135 static bool trans_STR_v_i(DisasContext
*s
, arg_ldst_imm
*a
)
3137 TCGv_i64 clean_addr
, dirty_addr
;
3140 if (!fp_access_check(s
)) {
3143 mop
= finalize_memop_asimd(s
, a
->sz
);
3144 op_addr_ldst_imm_pre(s
, a
, &clean_addr
, &dirty_addr
, a
->imm
, true, mop
);
3145 do_fp_st(s
, a
->rt
, clean_addr
, mop
);
3146 op_addr_ldst_imm_post(s
, a
, dirty_addr
, a
->imm
);
3150 static bool trans_LDR_v_i(DisasContext
*s
, arg_ldst_imm
*a
)
3152 TCGv_i64 clean_addr
, dirty_addr
;
3155 if (!fp_access_check(s
)) {
3158 mop
= finalize_memop_asimd(s
, a
->sz
);
3159 op_addr_ldst_imm_pre(s
, a
, &clean_addr
, &dirty_addr
, a
->imm
, false, mop
);
3160 do_fp_ld(s
, a
->rt
, clean_addr
, mop
);
3161 op_addr_ldst_imm_post(s
, a
, dirty_addr
, a
->imm
);
3165 static void op_addr_ldst_pre(DisasContext
*s
, arg_ldst
*a
,
3166 TCGv_i64
*clean_addr
, TCGv_i64
*dirty_addr
,
3167 bool is_store
, MemOp memop
)
3172 gen_check_sp_alignment(s
);
3174 *dirty_addr
= read_cpu_reg_sp(s
, a
->rn
, 1);
3176 tcg_rm
= read_cpu_reg(s
, a
->rm
, 1);
3177 ext_and_shift_reg(tcg_rm
, tcg_rm
, a
->opt
, a
->s
? a
->sz
: 0);
3179 tcg_gen_add_i64(*dirty_addr
, *dirty_addr
, tcg_rm
);
3180 *clean_addr
= gen_mte_check1(s
, *dirty_addr
, is_store
, true, memop
);
3183 static bool trans_LDR(DisasContext
*s
, arg_ldst
*a
)
3185 TCGv_i64 clean_addr
, dirty_addr
, tcg_rt
;
3186 bool iss_sf
= ldst_iss_sf(a
->sz
, a
->sign
, a
->ext
);
3189 if (extract32(a
->opt
, 1, 1) == 0) {
3193 memop
= finalize_memop(s
, a
->sz
+ a
->sign
* MO_SIGN
);
3194 op_addr_ldst_pre(s
, a
, &clean_addr
, &dirty_addr
, false, memop
);
3195 tcg_rt
= cpu_reg(s
, a
->rt
);
3196 do_gpr_ld(s
, tcg_rt
, clean_addr
, memop
,
3197 a
->ext
, true, a
->rt
, iss_sf
, false);
3201 static bool trans_STR(DisasContext
*s
, arg_ldst
*a
)
3203 TCGv_i64 clean_addr
, dirty_addr
, tcg_rt
;
3204 bool iss_sf
= ldst_iss_sf(a
->sz
, a
->sign
, a
->ext
);
3207 if (extract32(a
->opt
, 1, 1) == 0) {
3211 memop
= finalize_memop(s
, a
->sz
);
3212 op_addr_ldst_pre(s
, a
, &clean_addr
, &dirty_addr
, true, memop
);
3213 tcg_rt
= cpu_reg(s
, a
->rt
);
3214 do_gpr_st(s
, tcg_rt
, clean_addr
, memop
, true, a
->rt
, iss_sf
, false);
3218 static bool trans_LDR_v(DisasContext
*s
, arg_ldst
*a
)
3220 TCGv_i64 clean_addr
, dirty_addr
;
3223 if (extract32(a
->opt
, 1, 1) == 0) {
3227 if (!fp_access_check(s
)) {
3231 memop
= finalize_memop_asimd(s
, a
->sz
);
3232 op_addr_ldst_pre(s
, a
, &clean_addr
, &dirty_addr
, false, memop
);
3233 do_fp_ld(s
, a
->rt
, clean_addr
, memop
);
3237 static bool trans_STR_v(DisasContext
*s
, arg_ldst
*a
)
3239 TCGv_i64 clean_addr
, dirty_addr
;
3242 if (extract32(a
->opt
, 1, 1) == 0) {
3246 if (!fp_access_check(s
)) {
3250 memop
= finalize_memop_asimd(s
, a
->sz
);
3251 op_addr_ldst_pre(s
, a
, &clean_addr
, &dirty_addr
, true, memop
);
3252 do_fp_st(s
, a
->rt
, clean_addr
, memop
);
3257 static bool do_atomic_ld(DisasContext
*s
, arg_atomic
*a
, AtomicThreeOpFn
*fn
,
3258 int sign
, bool invert
)
3260 MemOp mop
= a
->sz
| sign
;
3261 TCGv_i64 clean_addr
, tcg_rs
, tcg_rt
;
3264 gen_check_sp_alignment(s
);
3266 mop
= check_atomic_align(s
, a
->rn
, mop
);
3267 clean_addr
= gen_mte_check1(s
, cpu_reg_sp(s
, a
->rn
), false,
3269 tcg_rs
= read_cpu_reg(s
, a
->rs
, true);
3270 tcg_rt
= cpu_reg(s
, a
->rt
);
3272 tcg_gen_not_i64(tcg_rs
, tcg_rs
);
3275 * The tcg atomic primitives are all full barriers. Therefore we
3276 * can ignore the Acquire and Release bits of this instruction.
3278 fn(tcg_rt
, clean_addr
, tcg_rs
, get_mem_index(s
), mop
);
3280 if (mop
& MO_SIGN
) {
3283 tcg_gen_ext8u_i64(tcg_rt
, tcg_rt
);
3286 tcg_gen_ext16u_i64(tcg_rt
, tcg_rt
);
3289 tcg_gen_ext32u_i64(tcg_rt
, tcg_rt
);
3294 g_assert_not_reached();
3300 TRANS_FEAT(LDADD
, aa64_atomics
, do_atomic_ld
, a
, tcg_gen_atomic_fetch_add_i64
, 0, false)
3301 TRANS_FEAT(LDCLR
, aa64_atomics
, do_atomic_ld
, a
, tcg_gen_atomic_fetch_and_i64
, 0, true)
3302 TRANS_FEAT(LDEOR
, aa64_atomics
, do_atomic_ld
, a
, tcg_gen_atomic_fetch_xor_i64
, 0, false)
3303 TRANS_FEAT(LDSET
, aa64_atomics
, do_atomic_ld
, a
, tcg_gen_atomic_fetch_or_i64
, 0, false)
3304 TRANS_FEAT(LDSMAX
, aa64_atomics
, do_atomic_ld
, a
, tcg_gen_atomic_fetch_smax_i64
, MO_SIGN
, false)
3305 TRANS_FEAT(LDSMIN
, aa64_atomics
, do_atomic_ld
, a
, tcg_gen_atomic_fetch_smin_i64
, MO_SIGN
, false)
3306 TRANS_FEAT(LDUMAX
, aa64_atomics
, do_atomic_ld
, a
, tcg_gen_atomic_fetch_umax_i64
, 0, false)
3307 TRANS_FEAT(LDUMIN
, aa64_atomics
, do_atomic_ld
, a
, tcg_gen_atomic_fetch_umin_i64
, 0, false)
3308 TRANS_FEAT(SWP
, aa64_atomics
, do_atomic_ld
, a
, tcg_gen_atomic_xchg_i64
, 0, false)
3310 static bool trans_LDAPR(DisasContext
*s
, arg_LDAPR
*a
)
3312 bool iss_sf
= ldst_iss_sf(a
->sz
, false, false);
3313 TCGv_i64 clean_addr
;
3316 if (!dc_isar_feature(aa64_atomics
, s
) ||
3317 !dc_isar_feature(aa64_rcpc_8_3
, s
)) {
3321 gen_check_sp_alignment(s
);
3323 mop
= check_atomic_align(s
, a
->rn
, a
->sz
);
3324 clean_addr
= gen_mte_check1(s
, cpu_reg_sp(s
, a
->rn
), false,
3327 * LDAPR* are a special case because they are a simple load, not a
3328 * fetch-and-do-something op.
3329 * The architectural consistency requirements here are weaker than
3330 * full load-acquire (we only need "load-acquire processor consistent"),
3331 * but we choose to implement them as full LDAQ.
3333 do_gpr_ld(s
, cpu_reg(s
, a
->rt
), clean_addr
, mop
, false,
3334 true, a
->rt
, iss_sf
, true);
3335 tcg_gen_mb(TCG_MO_ALL
| TCG_BAR_LDAQ
);
3339 static bool trans_LDRA(DisasContext
*s
, arg_LDRA
*a
)
3341 TCGv_i64 clean_addr
, dirty_addr
, tcg_rt
;
3344 /* Load with pointer authentication */
3345 if (!dc_isar_feature(aa64_pauth
, s
)) {
3350 gen_check_sp_alignment(s
);
3352 dirty_addr
= read_cpu_reg_sp(s
, a
->rn
, 1);
3354 if (s
->pauth_active
) {
3356 gen_helper_autda_combined(dirty_addr
, cpu_env
, dirty_addr
,
3357 tcg_constant_i64(0));
3359 gen_helper_autdb_combined(dirty_addr
, cpu_env
, dirty_addr
,
3360 tcg_constant_i64(0));
3364 tcg_gen_addi_i64(dirty_addr
, dirty_addr
, a
->imm
);
3366 memop
= finalize_memop(s
, MO_64
);
3368 /* Note that "clean" and "dirty" here refer to TBI not PAC. */
3369 clean_addr
= gen_mte_check1(s
, dirty_addr
, false,
3370 a
->w
|| a
->rn
!= 31, memop
);
3372 tcg_rt
= cpu_reg(s
, a
->rt
);
3373 do_gpr_ld(s
, tcg_rt
, clean_addr
, memop
,
3374 /* extend */ false, /* iss_valid */ !a
->w
,
3375 /* iss_srt */ a
->rt
, /* iss_sf */ true, /* iss_ar */ false);
3378 tcg_gen_mov_i64(cpu_reg_sp(s
, a
->rn
), dirty_addr
);
3383 static bool trans_LDAPR_i(DisasContext
*s
, arg_ldapr_stlr_i
*a
)
3385 TCGv_i64 clean_addr
, dirty_addr
;
3386 MemOp mop
= a
->sz
| (a
->sign
? MO_SIGN
: 0);
3387 bool iss_sf
= ldst_iss_sf(a
->sz
, a
->sign
, a
->ext
);
3389 if (!dc_isar_feature(aa64_rcpc_8_4
, s
)) {
3394 gen_check_sp_alignment(s
);
3397 mop
= check_ordered_align(s
, a
->rn
, a
->imm
, false, mop
);
3398 dirty_addr
= read_cpu_reg_sp(s
, a
->rn
, 1);
3399 tcg_gen_addi_i64(dirty_addr
, dirty_addr
, a
->imm
);
3400 clean_addr
= clean_data_tbi(s
, dirty_addr
);
3403 * Load-AcquirePC semantics; we implement as the slightly more
3404 * restrictive Load-Acquire.
3406 do_gpr_ld(s
, cpu_reg(s
, a
->rt
), clean_addr
, mop
, a
->ext
, true,
3407 a
->rt
, iss_sf
, true);
3408 tcg_gen_mb(TCG_MO_ALL
| TCG_BAR_LDAQ
);
3412 static bool trans_STLR_i(DisasContext
*s
, arg_ldapr_stlr_i
*a
)
3414 TCGv_i64 clean_addr
, dirty_addr
;
3416 bool iss_sf
= ldst_iss_sf(a
->sz
, a
->sign
, a
->ext
);
3418 if (!dc_isar_feature(aa64_rcpc_8_4
, s
)) {
3422 /* TODO: ARMv8.4-LSE SCTLR.nAA */
3425 gen_check_sp_alignment(s
);
3428 mop
= check_ordered_align(s
, a
->rn
, a
->imm
, true, mop
);
3429 dirty_addr
= read_cpu_reg_sp(s
, a
->rn
, 1);
3430 tcg_gen_addi_i64(dirty_addr
, dirty_addr
, a
->imm
);
3431 clean_addr
= clean_data_tbi(s
, dirty_addr
);
3433 /* Store-Release semantics */
3434 tcg_gen_mb(TCG_MO_ALL
| TCG_BAR_STRL
);
3435 do_gpr_st(s
, cpu_reg(s
, a
->rt
), clean_addr
, mop
, true, a
->rt
, iss_sf
, true);
3439 static bool trans_LD_mult(DisasContext
*s
, arg_ldst_mult
*a
)
3441 TCGv_i64 clean_addr
, tcg_rn
, tcg_ebytes
;
3442 MemOp endian
, align
, mop
;
3444 int total
; /* total bytes */
3445 int elements
; /* elements per vector */
3449 if (!a
->p
&& a
->rm
!= 0) {
3450 /* For non-postindexed accesses the Rm field must be 0 */
3453 if (size
== 3 && !a
->q
&& a
->selem
!= 1) {
3456 if (!fp_access_check(s
)) {
3461 gen_check_sp_alignment(s
);
3464 /* For our purposes, bytes are always little-endian. */
3465 endian
= s
->be_data
;
3470 total
= a
->rpt
* a
->selem
* (a
->q
? 16 : 8);
3471 tcg_rn
= cpu_reg_sp(s
, a
->rn
);
3474 * Issue the MTE check vs the logical repeat count, before we
3475 * promote consecutive little-endian elements below.
3477 clean_addr
= gen_mte_checkN(s
, tcg_rn
, false, a
->p
|| a
->rn
!= 31, total
,
3478 finalize_memop_asimd(s
, size
));
3481 * Consecutive little-endian elements from a single register
3482 * can be promoted to a larger little-endian operation.
3485 if (a
->selem
== 1 && endian
== MO_LE
) {
3486 align
= pow2_align(size
);
3489 if (!s
->align_mem
) {
3492 mop
= endian
| size
| align
;
3494 elements
= (a
->q
? 16 : 8) >> size
;
3495 tcg_ebytes
= tcg_constant_i64(1 << size
);
3496 for (r
= 0; r
< a
->rpt
; r
++) {
3498 for (e
= 0; e
< elements
; e
++) {
3500 for (xs
= 0; xs
< a
->selem
; xs
++) {
3501 int tt
= (a
->rt
+ r
+ xs
) % 32;
3502 do_vec_ld(s
, tt
, e
, clean_addr
, mop
);
3503 tcg_gen_add_i64(clean_addr
, clean_addr
, tcg_ebytes
);
3509 * For non-quad operations, setting a slice of the low 64 bits of
3510 * the register clears the high 64 bits (in the ARM ARM pseudocode
3511 * this is implicit in the fact that 'rval' is a 64 bit wide
3512 * variable). For quad operations, we might still need to zero
3513 * the high bits of SVE.
3515 for (r
= 0; r
< a
->rpt
* a
->selem
; r
++) {
3516 int tt
= (a
->rt
+ r
) % 32;
3517 clear_vec_high(s
, a
->q
, tt
);
3522 tcg_gen_addi_i64(tcg_rn
, tcg_rn
, total
);
3524 tcg_gen_add_i64(tcg_rn
, tcg_rn
, cpu_reg(s
, a
->rm
));
3530 static bool trans_ST_mult(DisasContext
*s
, arg_ldst_mult
*a
)
3532 TCGv_i64 clean_addr
, tcg_rn
, tcg_ebytes
;
3533 MemOp endian
, align
, mop
;
3535 int total
; /* total bytes */
3536 int elements
; /* elements per vector */
3540 if (!a
->p
&& a
->rm
!= 0) {
3541 /* For non-postindexed accesses the Rm field must be 0 */
3544 if (size
== 3 && !a
->q
&& a
->selem
!= 1) {
3547 if (!fp_access_check(s
)) {
3552 gen_check_sp_alignment(s
);
3555 /* For our purposes, bytes are always little-endian. */
3556 endian
= s
->be_data
;
3561 total
= a
->rpt
* a
->selem
* (a
->q
? 16 : 8);
3562 tcg_rn
= cpu_reg_sp(s
, a
->rn
);
3565 * Issue the MTE check vs the logical repeat count, before we
3566 * promote consecutive little-endian elements below.
3568 clean_addr
= gen_mte_checkN(s
, tcg_rn
, true, a
->p
|| a
->rn
!= 31, total
,
3569 finalize_memop_asimd(s
, size
));
3572 * Consecutive little-endian elements from a single register
3573 * can be promoted to a larger little-endian operation.
3576 if (a
->selem
== 1 && endian
== MO_LE
) {
3577 align
= pow2_align(size
);
3580 if (!s
->align_mem
) {
3583 mop
= endian
| size
| align
;
3585 elements
= (a
->q
? 16 : 8) >> size
;
3586 tcg_ebytes
= tcg_constant_i64(1 << size
);
3587 for (r
= 0; r
< a
->rpt
; r
++) {
3589 for (e
= 0; e
< elements
; e
++) {
3591 for (xs
= 0; xs
< a
->selem
; xs
++) {
3592 int tt
= (a
->rt
+ r
+ xs
) % 32;
3593 do_vec_st(s
, tt
, e
, clean_addr
, mop
);
3594 tcg_gen_add_i64(clean_addr
, clean_addr
, tcg_ebytes
);
3601 tcg_gen_addi_i64(tcg_rn
, tcg_rn
, total
);
3603 tcg_gen_add_i64(tcg_rn
, tcg_rn
, cpu_reg(s
, a
->rm
));
3609 static bool trans_ST_single(DisasContext
*s
, arg_ldst_single
*a
)
3612 TCGv_i64 clean_addr
, tcg_rn
, tcg_ebytes
;
3615 if (!a
->p
&& a
->rm
!= 0) {
3618 if (!fp_access_check(s
)) {
3623 gen_check_sp_alignment(s
);
3626 total
= a
->selem
<< a
->scale
;
3627 tcg_rn
= cpu_reg_sp(s
, a
->rn
);
3629 mop
= finalize_memop_asimd(s
, a
->scale
);
3630 clean_addr
= gen_mte_checkN(s
, tcg_rn
, true, a
->p
|| a
->rn
!= 31,
3633 tcg_ebytes
= tcg_constant_i64(1 << a
->scale
);
3634 for (xs
= 0, rt
= a
->rt
; xs
< a
->selem
; xs
++, rt
= (rt
+ 1) % 32) {
3635 do_vec_st(s
, rt
, a
->index
, clean_addr
, mop
);
3636 tcg_gen_add_i64(clean_addr
, clean_addr
, tcg_ebytes
);
3641 tcg_gen_addi_i64(tcg_rn
, tcg_rn
, total
);
3643 tcg_gen_add_i64(tcg_rn
, tcg_rn
, cpu_reg(s
, a
->rm
));
3649 static bool trans_LD_single(DisasContext
*s
, arg_ldst_single
*a
)
3652 TCGv_i64 clean_addr
, tcg_rn
, tcg_ebytes
;
3655 if (!a
->p
&& a
->rm
!= 0) {
3658 if (!fp_access_check(s
)) {
3663 gen_check_sp_alignment(s
);
3666 total
= a
->selem
<< a
->scale
;
3667 tcg_rn
= cpu_reg_sp(s
, a
->rn
);
3669 mop
= finalize_memop_asimd(s
, a
->scale
);
3670 clean_addr
= gen_mte_checkN(s
, tcg_rn
, false, a
->p
|| a
->rn
!= 31,
3673 tcg_ebytes
= tcg_constant_i64(1 << a
->scale
);
3674 for (xs
= 0, rt
= a
->rt
; xs
< a
->selem
; xs
++, rt
= (rt
+ 1) % 32) {
3675 do_vec_ld(s
, rt
, a
->index
, clean_addr
, mop
);
3676 tcg_gen_add_i64(clean_addr
, clean_addr
, tcg_ebytes
);
3681 tcg_gen_addi_i64(tcg_rn
, tcg_rn
, total
);
3683 tcg_gen_add_i64(tcg_rn
, tcg_rn
, cpu_reg(s
, a
->rm
));
3689 static bool trans_LD_single_repl(DisasContext
*s
, arg_LD_single_repl
*a
)
3692 TCGv_i64 clean_addr
, tcg_rn
, tcg_ebytes
;
3695 if (!a
->p
&& a
->rm
!= 0) {
3698 if (!fp_access_check(s
)) {
3703 gen_check_sp_alignment(s
);
3706 total
= a
->selem
<< a
->scale
;
3707 tcg_rn
= cpu_reg_sp(s
, a
->rn
);
3709 mop
= finalize_memop_asimd(s
, a
->scale
);
3710 clean_addr
= gen_mte_checkN(s
, tcg_rn
, false, a
->p
|| a
->rn
!= 31,
3713 tcg_ebytes
= tcg_constant_i64(1 << a
->scale
);
3714 for (xs
= 0, rt
= a
->rt
; xs
< a
->selem
; xs
++, rt
= (rt
+ 1) % 32) {
3715 /* Load and replicate to all elements */
3716 TCGv_i64 tcg_tmp
= tcg_temp_new_i64();
3718 tcg_gen_qemu_ld_i64(tcg_tmp
, clean_addr
, get_mem_index(s
), mop
);
3719 tcg_gen_gvec_dup_i64(a
->scale
, vec_full_reg_offset(s
, rt
),
3720 (a
->q
+ 1) * 8, vec_full_reg_size(s
), tcg_tmp
);
3721 tcg_gen_add_i64(clean_addr
, clean_addr
, tcg_ebytes
);
3726 tcg_gen_addi_i64(tcg_rn
, tcg_rn
, total
);
3728 tcg_gen_add_i64(tcg_rn
, tcg_rn
, cpu_reg(s
, a
->rm
));
3734 static bool trans_STZGM(DisasContext
*s
, arg_ldst_tag
*a
)
3736 TCGv_i64 addr
, clean_addr
, tcg_rt
;
3737 int size
= 4 << s
->dcz_blocksize
;
3739 if (!dc_isar_feature(aa64_mte
, s
)) {
3742 if (s
->current_el
== 0) {
3747 gen_check_sp_alignment(s
);
3750 addr
= read_cpu_reg_sp(s
, a
->rn
, true);
3751 tcg_gen_addi_i64(addr
, addr
, a
->imm
);
3752 tcg_rt
= cpu_reg(s
, a
->rt
);
3755 gen_helper_stzgm_tags(cpu_env
, addr
, tcg_rt
);
3758 * The non-tags portion of STZGM is mostly like DC_ZVA,
3759 * except the alignment happens before the access.
3761 clean_addr
= clean_data_tbi(s
, addr
);
3762 tcg_gen_andi_i64(clean_addr
, clean_addr
, -size
);
3763 gen_helper_dc_zva(cpu_env
, clean_addr
);
3767 static bool trans_STGM(DisasContext
*s
, arg_ldst_tag
*a
)
3769 TCGv_i64 addr
, clean_addr
, tcg_rt
;
3771 if (!dc_isar_feature(aa64_mte
, s
)) {
3774 if (s
->current_el
== 0) {
3779 gen_check_sp_alignment(s
);
3782 addr
= read_cpu_reg_sp(s
, a
->rn
, true);
3783 tcg_gen_addi_i64(addr
, addr
, a
->imm
);
3784 tcg_rt
= cpu_reg(s
, a
->rt
);
3787 gen_helper_stgm(cpu_env
, addr
, tcg_rt
);
3789 MMUAccessType acc
= MMU_DATA_STORE
;
3790 int size
= 4 << s
->gm_blocksize
;
3792 clean_addr
= clean_data_tbi(s
, addr
);
3793 tcg_gen_andi_i64(clean_addr
, clean_addr
, -size
);
3794 gen_probe_access(s
, clean_addr
, acc
, size
);
3799 static bool trans_LDGM(DisasContext
*s
, arg_ldst_tag
*a
)
3801 TCGv_i64 addr
, clean_addr
, tcg_rt
;
3803 if (!dc_isar_feature(aa64_mte
, s
)) {
3806 if (s
->current_el
== 0) {
3811 gen_check_sp_alignment(s
);
3814 addr
= read_cpu_reg_sp(s
, a
->rn
, true);
3815 tcg_gen_addi_i64(addr
, addr
, a
->imm
);
3816 tcg_rt
= cpu_reg(s
, a
->rt
);
3819 gen_helper_ldgm(tcg_rt
, cpu_env
, addr
);
3821 MMUAccessType acc
= MMU_DATA_LOAD
;
3822 int size
= 4 << s
->gm_blocksize
;
3824 clean_addr
= clean_data_tbi(s
, addr
);
3825 tcg_gen_andi_i64(clean_addr
, clean_addr
, -size
);
3826 gen_probe_access(s
, clean_addr
, acc
, size
);
3827 /* The result tags are zeros. */
3828 tcg_gen_movi_i64(tcg_rt
, 0);
3833 static bool trans_LDG(DisasContext
*s
, arg_ldst_tag
*a
)
3835 TCGv_i64 addr
, clean_addr
, tcg_rt
;
3837 if (!dc_isar_feature(aa64_mte_insn_reg
, s
)) {
3842 gen_check_sp_alignment(s
);
3845 addr
= read_cpu_reg_sp(s
, a
->rn
, true);
3847 /* pre-index or signed offset */
3848 tcg_gen_addi_i64(addr
, addr
, a
->imm
);
3851 tcg_gen_andi_i64(addr
, addr
, -TAG_GRANULE
);
3852 tcg_rt
= cpu_reg(s
, a
->rt
);
3854 gen_helper_ldg(tcg_rt
, cpu_env
, addr
, tcg_rt
);
3857 * Tag access disabled: we must check for aborts on the load
3858 * load from [rn+offset], and then insert a 0 tag into rt.
3860 clean_addr
= clean_data_tbi(s
, addr
);
3861 gen_probe_access(s
, clean_addr
, MMU_DATA_LOAD
, MO_8
);
3862 gen_address_with_allocation_tag0(tcg_rt
, tcg_rt
);
3866 /* pre-index or post-index */
3869 tcg_gen_addi_i64(addr
, addr
, a
->imm
);
3871 tcg_gen_mov_i64(cpu_reg_sp(s
, a
->rn
), addr
);
3876 static bool do_STG(DisasContext
*s
, arg_ldst_tag
*a
, bool is_zero
, bool is_pair
)
3878 TCGv_i64 addr
, tcg_rt
;
3881 gen_check_sp_alignment(s
);
3884 addr
= read_cpu_reg_sp(s
, a
->rn
, true);
3886 /* pre-index or signed offset */
3887 tcg_gen_addi_i64(addr
, addr
, a
->imm
);
3889 tcg_rt
= cpu_reg_sp(s
, a
->rt
);
3892 * For STG and ST2G, we need to check alignment and probe memory.
3893 * TODO: For STZG and STZ2G, we could rely on the stores below,
3894 * at least for system mode; user-only won't enforce alignment.
3897 gen_helper_st2g_stub(cpu_env
, addr
);
3899 gen_helper_stg_stub(cpu_env
, addr
);
3901 } else if (tb_cflags(s
->base
.tb
) & CF_PARALLEL
) {
3903 gen_helper_st2g_parallel(cpu_env
, addr
, tcg_rt
);
3905 gen_helper_stg_parallel(cpu_env
, addr
, tcg_rt
);
3909 gen_helper_st2g(cpu_env
, addr
, tcg_rt
);
3911 gen_helper_stg(cpu_env
, addr
, tcg_rt
);
3916 TCGv_i64 clean_addr
= clean_data_tbi(s
, addr
);
3917 TCGv_i64 zero64
= tcg_constant_i64(0);
3918 TCGv_i128 zero128
= tcg_temp_new_i128();
3919 int mem_index
= get_mem_index(s
);
3920 MemOp mop
= finalize_memop(s
, MO_128
| MO_ALIGN
);
3922 tcg_gen_concat_i64_i128(zero128
, zero64
, zero64
);
3924 /* This is 1 or 2 atomic 16-byte operations. */
3925 tcg_gen_qemu_st_i128(zero128
, clean_addr
, mem_index
, mop
);
3927 tcg_gen_addi_i64(clean_addr
, clean_addr
, 16);
3928 tcg_gen_qemu_st_i128(zero128
, clean_addr
, mem_index
, mop
);
3933 /* pre-index or post-index */
3936 tcg_gen_addi_i64(addr
, addr
, a
->imm
);
3938 tcg_gen_mov_i64(cpu_reg_sp(s
, a
->rn
), addr
);
3943 TRANS_FEAT(STG
, aa64_mte_insn_reg
, do_STG
, a
, false, false)
3944 TRANS_FEAT(STZG
, aa64_mte_insn_reg
, do_STG
, a
, true, false)
3945 TRANS_FEAT(ST2G
, aa64_mte_insn_reg
, do_STG
, a
, false, true)
3946 TRANS_FEAT(STZ2G
, aa64_mte_insn_reg
, do_STG
, a
, true, true)
3948 typedef void ArithTwoOp(TCGv_i64
, TCGv_i64
, TCGv_i64
);
3950 static bool gen_rri(DisasContext
*s
, arg_rri_sf
*a
,
3951 bool rd_sp
, bool rn_sp
, ArithTwoOp
*fn
)
3953 TCGv_i64 tcg_rn
= rn_sp
? cpu_reg_sp(s
, a
->rn
) : cpu_reg(s
, a
->rn
);
3954 TCGv_i64 tcg_rd
= rd_sp
? cpu_reg_sp(s
, a
->rd
) : cpu_reg(s
, a
->rd
);
3955 TCGv_i64 tcg_imm
= tcg_constant_i64(a
->imm
);
3957 fn(tcg_rd
, tcg_rn
, tcg_imm
);
3959 tcg_gen_ext32u_i64(tcg_rd
, tcg_rd
);
3965 * PC-rel. addressing
3968 static bool trans_ADR(DisasContext
*s
, arg_ri
*a
)
3970 gen_pc_plus_diff(s
, cpu_reg(s
, a
->rd
), a
->imm
);
3974 static bool trans_ADRP(DisasContext
*s
, arg_ri
*a
)
3976 int64_t offset
= (int64_t)a
->imm
<< 12;
3978 /* The page offset is ok for CF_PCREL. */
3979 offset
-= s
->pc_curr
& 0xfff;
3980 gen_pc_plus_diff(s
, cpu_reg(s
, a
->rd
), offset
);
3985 * Add/subtract (immediate)
3987 TRANS(ADD_i
, gen_rri
, a
, 1, 1, tcg_gen_add_i64
)
3988 TRANS(SUB_i
, gen_rri
, a
, 1, 1, tcg_gen_sub_i64
)
3989 TRANS(ADDS_i
, gen_rri
, a
, 0, 1, a
->sf
? gen_add64_CC
: gen_add32_CC
)
3990 TRANS(SUBS_i
, gen_rri
, a
, 0, 1, a
->sf
? gen_sub64_CC
: gen_sub32_CC
)
3993 * Add/subtract (immediate, with tags)
3996 static bool gen_add_sub_imm_with_tags(DisasContext
*s
, arg_rri_tag
*a
,
3999 TCGv_i64 tcg_rn
, tcg_rd
;
4002 imm
= a
->uimm6
<< LOG2_TAG_GRANULE
;
4007 tcg_rn
= cpu_reg_sp(s
, a
->rn
);
4008 tcg_rd
= cpu_reg_sp(s
, a
->rd
);
4011 gen_helper_addsubg(tcg_rd
, cpu_env
, tcg_rn
,
4012 tcg_constant_i32(imm
),
4013 tcg_constant_i32(a
->uimm4
));
4015 tcg_gen_addi_i64(tcg_rd
, tcg_rn
, imm
);
4016 gen_address_with_allocation_tag0(tcg_rd
, tcg_rd
);
4021 TRANS_FEAT(ADDG_i
, aa64_mte_insn_reg
, gen_add_sub_imm_with_tags
, a
, false)
4022 TRANS_FEAT(SUBG_i
, aa64_mte_insn_reg
, gen_add_sub_imm_with_tags
, a
, true)
4024 /* The input should be a value in the bottom e bits (with higher
4025 * bits zero); returns that value replicated into every element
4026 * of size e in a 64 bit integer.
4028 static uint64_t bitfield_replicate(uint64_t mask
, unsigned int e
)
4039 * Logical (immediate)
4043 * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4044 * only require the wmask. Returns false if the imms/immr/immn are a reserved
4045 * value (ie should cause a guest UNDEF exception), and true if they are
4046 * valid, in which case the decoded bit pattern is written to result.
4048 bool logic_imm_decode_wmask(uint64_t *result
, unsigned int immn
,
4049 unsigned int imms
, unsigned int immr
)
4052 unsigned e
, levels
, s
, r
;
4055 assert(immn
< 2 && imms
< 64 && immr
< 64);
4057 /* The bit patterns we create here are 64 bit patterns which
4058 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4059 * 64 bits each. Each element contains the same value: a run
4060 * of between 1 and e-1 non-zero bits, rotated within the
4061 * element by between 0 and e-1 bits.
4063 * The element size and run length are encoded into immn (1 bit)
4064 * and imms (6 bits) as follows:
4065 * 64 bit elements: immn = 1, imms = <length of run - 1>
4066 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4067 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4068 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4069 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4070 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4071 * Notice that immn = 0, imms = 11111x is the only combination
4072 * not covered by one of the above options; this is reserved.
4073 * Further, <length of run - 1> all-ones is a reserved pattern.
4075 * In all cases the rotation is by immr % e (and immr is 6 bits).
4078 /* First determine the element size */
4079 len
= 31 - clz32((immn
<< 6) | (~imms
& 0x3f));
4081 /* This is the immn == 0, imms == 0x11111x case */
4091 /* <length of run - 1> mustn't be all-ones. */
4095 /* Create the value of one element: s+1 set bits rotated
4096 * by r within the element (which is e bits wide)...
4098 mask
= MAKE_64BIT_MASK(0, s
+ 1);
4100 mask
= (mask
>> r
) | (mask
<< (e
- r
));
4101 mask
&= MAKE_64BIT_MASK(0, e
);
4103 /* ...then replicate the element over the whole 64 bit value */
4104 mask
= bitfield_replicate(mask
, e
);
4109 static bool gen_rri_log(DisasContext
*s
, arg_rri_log
*a
, bool set_cc
,
4110 void (*fn
)(TCGv_i64
, TCGv_i64
, int64_t))
4112 TCGv_i64 tcg_rd
, tcg_rn
;
4115 /* Some immediate field values are reserved. */
4116 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
4117 extract32(a
->dbm
, 0, 6),
4118 extract32(a
->dbm
, 6, 6))) {
4122 imm
&= 0xffffffffull
;
4125 tcg_rd
= set_cc
? cpu_reg(s
, a
->rd
) : cpu_reg_sp(s
, a
->rd
);
4126 tcg_rn
= cpu_reg(s
, a
->rn
);
4128 fn(tcg_rd
, tcg_rn
, imm
);
4130 gen_logic_CC(a
->sf
, tcg_rd
);
4133 tcg_gen_ext32u_i64(tcg_rd
, tcg_rd
);
4138 TRANS(AND_i
, gen_rri_log
, a
, false, tcg_gen_andi_i64
)
4139 TRANS(ORR_i
, gen_rri_log
, a
, false, tcg_gen_ori_i64
)
4140 TRANS(EOR_i
, gen_rri_log
, a
, false, tcg_gen_xori_i64
)
4141 TRANS(ANDS_i
, gen_rri_log
, a
, true, tcg_gen_andi_i64
)
4144 * Move wide (immediate)
4147 static bool trans_MOVZ(DisasContext
*s
, arg_movw
*a
)
4149 int pos
= a
->hw
<< 4;
4150 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), (uint64_t)a
->imm
<< pos
);
4154 static bool trans_MOVN(DisasContext
*s
, arg_movw
*a
)
4156 int pos
= a
->hw
<< 4;
4157 uint64_t imm
= a
->imm
;
4159 imm
= ~(imm
<< pos
);
4161 imm
= (uint32_t)imm
;
4163 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), imm
);
4167 static bool trans_MOVK(DisasContext
*s
, arg_movw
*a
)
4169 int pos
= a
->hw
<< 4;
4170 TCGv_i64 tcg_rd
, tcg_im
;
4172 tcg_rd
= cpu_reg(s
, a
->rd
);
4173 tcg_im
= tcg_constant_i64(a
->imm
);
4174 tcg_gen_deposit_i64(tcg_rd
, tcg_rd
, tcg_im
, pos
, 16);
4176 tcg_gen_ext32u_i64(tcg_rd
, tcg_rd
);
4185 static bool trans_SBFM(DisasContext
*s
, arg_SBFM
*a
)
4187 TCGv_i64 tcg_rd
= cpu_reg(s
, a
->rd
);
4188 TCGv_i64 tcg_tmp
= read_cpu_reg(s
, a
->rn
, 1);
4189 unsigned int bitsize
= a
->sf
? 64 : 32;
4190 unsigned int ri
= a
->immr
;
4191 unsigned int si
= a
->imms
;
4192 unsigned int pos
, len
;
4195 /* Wd<s-r:0> = Wn<s:r> */
4196 len
= (si
- ri
) + 1;
4197 tcg_gen_sextract_i64(tcg_rd
, tcg_tmp
, ri
, len
);
4199 tcg_gen_ext32u_i64(tcg_rd
, tcg_rd
);
4202 /* Wd<32+s-r,32-r> = Wn<s:0> */
4204 pos
= (bitsize
- ri
) & (bitsize
- 1);
4208 * Sign extend the destination field from len to fill the
4209 * balance of the word. Let the deposit below insert all
4210 * of those sign bits.
4212 tcg_gen_sextract_i64(tcg_tmp
, tcg_tmp
, 0, len
);
4217 * We start with zero, and we haven't modified any bits outside
4218 * bitsize, therefore no final zero-extension is unneeded for !sf.
4220 tcg_gen_deposit_z_i64(tcg_rd
, tcg_tmp
, pos
, len
);
4225 static bool trans_UBFM(DisasContext
*s
, arg_UBFM
*a
)
4227 TCGv_i64 tcg_rd
= cpu_reg(s
, a
->rd
);
4228 TCGv_i64 tcg_tmp
= read_cpu_reg(s
, a
->rn
, 1);
4229 unsigned int bitsize
= a
->sf
? 64 : 32;
4230 unsigned int ri
= a
->immr
;
4231 unsigned int si
= a
->imms
;
4232 unsigned int pos
, len
;
4234 tcg_rd
= cpu_reg(s
, a
->rd
);
4235 tcg_tmp
= read_cpu_reg(s
, a
->rn
, 1);
4238 /* Wd<s-r:0> = Wn<s:r> */
4239 len
= (si
- ri
) + 1;
4240 tcg_gen_extract_i64(tcg_rd
, tcg_tmp
, ri
, len
);
4242 /* Wd<32+s-r,32-r> = Wn<s:0> */
4244 pos
= (bitsize
- ri
) & (bitsize
- 1);
4245 tcg_gen_deposit_z_i64(tcg_rd
, tcg_tmp
, pos
, len
);
4250 static bool trans_BFM(DisasContext
*s
, arg_BFM
*a
)
4252 TCGv_i64 tcg_rd
= cpu_reg(s
, a
->rd
);
4253 TCGv_i64 tcg_tmp
= read_cpu_reg(s
, a
->rn
, 1);
4254 unsigned int bitsize
= a
->sf
? 64 : 32;
4255 unsigned int ri
= a
->immr
;
4256 unsigned int si
= a
->imms
;
4257 unsigned int pos
, len
;
4259 tcg_rd
= cpu_reg(s
, a
->rd
);
4260 tcg_tmp
= read_cpu_reg(s
, a
->rn
, 1);
4263 /* Wd<s-r:0> = Wn<s:r> */
4264 tcg_gen_shri_i64(tcg_tmp
, tcg_tmp
, ri
);
4265 len
= (si
- ri
) + 1;
4268 /* Wd<32+s-r,32-r> = Wn<s:0> */
4270 pos
= (bitsize
- ri
) & (bitsize
- 1);
4273 tcg_gen_deposit_i64(tcg_rd
, tcg_rd
, tcg_tmp
, pos
, len
);
4275 tcg_gen_ext32u_i64(tcg_rd
, tcg_rd
);
4280 static bool trans_EXTR(DisasContext
*s
, arg_extract
*a
)
4282 TCGv_i64 tcg_rd
, tcg_rm
, tcg_rn
;
4284 tcg_rd
= cpu_reg(s
, a
->rd
);
4286 if (unlikely(a
->imm
== 0)) {
4288 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4289 * so an extract from bit 0 is a special case.
4292 tcg_gen_mov_i64(tcg_rd
, cpu_reg(s
, a
->rm
));
4294 tcg_gen_ext32u_i64(tcg_rd
, cpu_reg(s
, a
->rm
));
4297 tcg_rm
= cpu_reg(s
, a
->rm
);
4298 tcg_rn
= cpu_reg(s
, a
->rn
);
4301 /* Specialization to ROR happens in EXTRACT2. */
4302 tcg_gen_extract2_i64(tcg_rd
, tcg_rm
, tcg_rn
, a
->imm
);
4304 TCGv_i32 t0
= tcg_temp_new_i32();
4306 tcg_gen_extrl_i64_i32(t0
, tcg_rm
);
4307 if (a
->rm
== a
->rn
) {
4308 tcg_gen_rotri_i32(t0
, t0
, a
->imm
);
4310 TCGv_i32 t1
= tcg_temp_new_i32();
4311 tcg_gen_extrl_i64_i32(t1
, tcg_rn
);
4312 tcg_gen_extract2_i32(t0
, t0
, t1
, a
->imm
);
4314 tcg_gen_extu_i32_i64(tcg_rd
, t0
);
4320 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4321 * Note that it is the caller's responsibility to ensure that the
4322 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4323 * mandated semantics for out of range shifts.
4325 static void shift_reg(TCGv_i64 dst
, TCGv_i64 src
, int sf
,
4326 enum a64_shift_type shift_type
, TCGv_i64 shift_amount
)
4328 switch (shift_type
) {
4329 case A64_SHIFT_TYPE_LSL
:
4330 tcg_gen_shl_i64(dst
, src
, shift_amount
);
4332 case A64_SHIFT_TYPE_LSR
:
4333 tcg_gen_shr_i64(dst
, src
, shift_amount
);
4335 case A64_SHIFT_TYPE_ASR
:
4337 tcg_gen_ext32s_i64(dst
, src
);
4339 tcg_gen_sar_i64(dst
, sf
? src
: dst
, shift_amount
);
4341 case A64_SHIFT_TYPE_ROR
:
4343 tcg_gen_rotr_i64(dst
, src
, shift_amount
);
4346 t0
= tcg_temp_new_i32();
4347 t1
= tcg_temp_new_i32();
4348 tcg_gen_extrl_i64_i32(t0
, src
);
4349 tcg_gen_extrl_i64_i32(t1
, shift_amount
);
4350 tcg_gen_rotr_i32(t0
, t0
, t1
);
4351 tcg_gen_extu_i32_i64(dst
, t0
);
4355 assert(FALSE
); /* all shift types should be handled */
4359 if (!sf
) { /* zero extend final result */
4360 tcg_gen_ext32u_i64(dst
, dst
);
4364 /* Shift a TCGv src by immediate, put result in dst.
4365 * The shift amount must be in range (this should always be true as the
4366 * relevant instructions will UNDEF on bad shift immediates).
4368 static void shift_reg_imm(TCGv_i64 dst
, TCGv_i64 src
, int sf
,
4369 enum a64_shift_type shift_type
, unsigned int shift_i
)
4371 assert(shift_i
< (sf
? 64 : 32));
4374 tcg_gen_mov_i64(dst
, src
);
4376 shift_reg(dst
, src
, sf
, shift_type
, tcg_constant_i64(shift_i
));
4380 /* Logical (shifted register)
4381 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
4382 * +----+-----+-----------+-------+---+------+--------+------+------+
4383 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd |
4384 * +----+-----+-----------+-------+---+------+--------+------+------+
4386 static void disas_logic_reg(DisasContext
*s
, uint32_t insn
)
4388 TCGv_i64 tcg_rd
, tcg_rn
, tcg_rm
;
4389 unsigned int sf
, opc
, shift_type
, invert
, rm
, shift_amount
, rn
, rd
;
4391 sf
= extract32(insn
, 31, 1);
4392 opc
= extract32(insn
, 29, 2);
4393 shift_type
= extract32(insn
, 22, 2);
4394 invert
= extract32(insn
, 21, 1);
4395 rm
= extract32(insn
, 16, 5);
4396 shift_amount
= extract32(insn
, 10, 6);
4397 rn
= extract32(insn
, 5, 5);
4398 rd
= extract32(insn
, 0, 5);
4400 if (!sf
&& (shift_amount
& (1 << 5))) {
4401 unallocated_encoding(s
);
4405 tcg_rd
= cpu_reg(s
, rd
);
4407 if (opc
== 1 && shift_amount
== 0 && shift_type
== 0 && rn
== 31) {
4408 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4409 * register-register MOV and MVN, so it is worth special casing.
4411 tcg_rm
= cpu_reg(s
, rm
);
4413 tcg_gen_not_i64(tcg_rd
, tcg_rm
);
4415 tcg_gen_ext32u_i64(tcg_rd
, tcg_rd
);
4419 tcg_gen_mov_i64(tcg_rd
, tcg_rm
);
4421 tcg_gen_ext32u_i64(tcg_rd
, tcg_rm
);
4427 tcg_rm
= read_cpu_reg(s
, rm
, sf
);
4430 shift_reg_imm(tcg_rm
, tcg_rm
, sf
, shift_type
, shift_amount
);
4433 tcg_rn
= cpu_reg(s
, rn
);
4435 switch (opc
| (invert
<< 2)) {
4438 tcg_gen_and_i64(tcg_rd
, tcg_rn
, tcg_rm
);
4441 tcg_gen_or_i64(tcg_rd
, tcg_rn
, tcg_rm
);
4444 tcg_gen_xor_i64(tcg_rd
, tcg_rn
, tcg_rm
);
4448 tcg_gen_andc_i64(tcg_rd
, tcg_rn
, tcg_rm
);
4451 tcg_gen_orc_i64(tcg_rd
, tcg_rn
, tcg_rm
);
4454 tcg_gen_eqv_i64(tcg_rd
, tcg_rn
, tcg_rm
);
4462 tcg_gen_ext32u_i64(tcg_rd
, tcg_rd
);
4466 gen_logic_CC(sf
, tcg_rd
);
4471 * Add/subtract (extended register)
4473 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0|
4474 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4475 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd |
4476 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4478 * sf: 0 -> 32bit, 1 -> 64bit
4479 * op: 0 -> add , 1 -> sub
4482 * option: extension type (see DecodeRegExtend)
4483 * imm3: optional shift to Rm
4485 * Rd = Rn + LSL(extend(Rm), amount)
4487 static void disas_add_sub_ext_reg(DisasContext
*s
, uint32_t insn
)
4489 int rd
= extract32(insn
, 0, 5);
4490 int rn
= extract32(insn
, 5, 5);
4491 int imm3
= extract32(insn
, 10, 3);
4492 int option
= extract32(insn
, 13, 3);
4493 int rm
= extract32(insn
, 16, 5);
4494 int opt
= extract32(insn
, 22, 2);
4495 bool setflags
= extract32(insn
, 29, 1);
4496 bool sub_op
= extract32(insn
, 30, 1);
4497 bool sf
= extract32(insn
, 31, 1);
4499 TCGv_i64 tcg_rm
, tcg_rn
; /* temps */
4501 TCGv_i64 tcg_result
;
4503 if (imm3
> 4 || opt
!= 0) {
4504 unallocated_encoding(s
);
4508 /* non-flag setting ops may use SP */
4510 tcg_rd
= cpu_reg_sp(s
, rd
);
4512 tcg_rd
= cpu_reg(s
, rd
);
4514 tcg_rn
= read_cpu_reg_sp(s
, rn
, sf
);
4516 tcg_rm
= read_cpu_reg(s
, rm
, sf
);
4517 ext_and_shift_reg(tcg_rm
, tcg_rm
, option
, imm3
);
4519 tcg_result
= tcg_temp_new_i64();
4523 tcg_gen_sub_i64(tcg_result
, tcg_rn
, tcg_rm
);
4525 tcg_gen_add_i64(tcg_result
, tcg_rn
, tcg_rm
);
4529 gen_sub_CC(sf
, tcg_result
, tcg_rn
, tcg_rm
);
4531 gen_add_CC(sf
, tcg_result
, tcg_rn
, tcg_rm
);
4536 tcg_gen_mov_i64(tcg_rd
, tcg_result
);
4538 tcg_gen_ext32u_i64(tcg_rd
, tcg_result
);
4543 * Add/subtract (shifted register)
4545 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
4546 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4547 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd |
4548 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4550 * sf: 0 -> 32bit, 1 -> 64bit
4551 * op: 0 -> add , 1 -> sub
4553 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4554 * imm6: Shift amount to apply to Rm before the add/sub
4556 static void disas_add_sub_reg(DisasContext
*s
, uint32_t insn
)
4558 int rd
= extract32(insn
, 0, 5);
4559 int rn
= extract32(insn
, 5, 5);
4560 int imm6
= extract32(insn
, 10, 6);
4561 int rm
= extract32(insn
, 16, 5);
4562 int shift_type
= extract32(insn
, 22, 2);
4563 bool setflags
= extract32(insn
, 29, 1);
4564 bool sub_op
= extract32(insn
, 30, 1);
4565 bool sf
= extract32(insn
, 31, 1);
4567 TCGv_i64 tcg_rd
= cpu_reg(s
, rd
);
4568 TCGv_i64 tcg_rn
, tcg_rm
;
4569 TCGv_i64 tcg_result
;
4571 if ((shift_type
== 3) || (!sf
&& (imm6
> 31))) {
4572 unallocated_encoding(s
);
4576 tcg_rn
= read_cpu_reg(s
, rn
, sf
);
4577 tcg_rm
= read_cpu_reg(s
, rm
, sf
);
4579 shift_reg_imm(tcg_rm
, tcg_rm
, sf
, shift_type
, imm6
);
4581 tcg_result
= tcg_temp_new_i64();
4585 tcg_gen_sub_i64(tcg_result
, tcg_rn
, tcg_rm
);
4587 tcg_gen_add_i64(tcg_result
, tcg_rn
, tcg_rm
);
4591 gen_sub_CC(sf
, tcg_result
, tcg_rn
, tcg_rm
);
4593 gen_add_CC(sf
, tcg_result
, tcg_rn
, tcg_rm
);
4598 tcg_gen_mov_i64(tcg_rd
, tcg_result
);
4600 tcg_gen_ext32u_i64(tcg_rd
, tcg_result
);
4604 /* Data-processing (3 source)
4606 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0
4607 * +--+------+-----------+------+------+----+------+------+------+
4608 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd |
4609 * +--+------+-----------+------+------+----+------+------+------+
4611 static void disas_data_proc_3src(DisasContext
*s
, uint32_t insn
)
4613 int rd
= extract32(insn
, 0, 5);
4614 int rn
= extract32(insn
, 5, 5);
4615 int ra
= extract32(insn
, 10, 5);
4616 int rm
= extract32(insn
, 16, 5);
4617 int op_id
= (extract32(insn
, 29, 3) << 4) |
4618 (extract32(insn
, 21, 3) << 1) |
4619 extract32(insn
, 15, 1);
4620 bool sf
= extract32(insn
, 31, 1);
4621 bool is_sub
= extract32(op_id
, 0, 1);
4622 bool is_high
= extract32(op_id
, 2, 1);
4623 bool is_signed
= false;
4628 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4630 case 0x42: /* SMADDL */
4631 case 0x43: /* SMSUBL */
4632 case 0x44: /* SMULH */
4635 case 0x0: /* MADD (32bit) */
4636 case 0x1: /* MSUB (32bit) */
4637 case 0x40: /* MADD (64bit) */
4638 case 0x41: /* MSUB (64bit) */
4639 case 0x4a: /* UMADDL */
4640 case 0x4b: /* UMSUBL */
4641 case 0x4c: /* UMULH */
4644 unallocated_encoding(s
);
4649 TCGv_i64 low_bits
= tcg_temp_new_i64(); /* low bits discarded */
4650 TCGv_i64 tcg_rd
= cpu_reg(s
, rd
);
4651 TCGv_i64 tcg_rn
= cpu_reg(s
, rn
);
4652 TCGv_i64 tcg_rm
= cpu_reg(s
, rm
);
4655 tcg_gen_muls2_i64(low_bits
, tcg_rd
, tcg_rn
, tcg_rm
);
4657 tcg_gen_mulu2_i64(low_bits
, tcg_rd
, tcg_rn
, tcg_rm
);
4662 tcg_op1
= tcg_temp_new_i64();
4663 tcg_op2
= tcg_temp_new_i64();
4664 tcg_tmp
= tcg_temp_new_i64();
4667 tcg_gen_mov_i64(tcg_op1
, cpu_reg(s
, rn
));
4668 tcg_gen_mov_i64(tcg_op2
, cpu_reg(s
, rm
));
4671 tcg_gen_ext32s_i64(tcg_op1
, cpu_reg(s
, rn
));
4672 tcg_gen_ext32s_i64(tcg_op2
, cpu_reg(s
, rm
));
4674 tcg_gen_ext32u_i64(tcg_op1
, cpu_reg(s
, rn
));
4675 tcg_gen_ext32u_i64(tcg_op2
, cpu_reg(s
, rm
));
4679 if (ra
== 31 && !is_sub
) {
4680 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4681 tcg_gen_mul_i64(cpu_reg(s
, rd
), tcg_op1
, tcg_op2
);
4683 tcg_gen_mul_i64(tcg_tmp
, tcg_op1
, tcg_op2
);
4685 tcg_gen_sub_i64(cpu_reg(s
, rd
), cpu_reg(s
, ra
), tcg_tmp
);
4687 tcg_gen_add_i64(cpu_reg(s
, rd
), cpu_reg(s
, ra
), tcg_tmp
);
4692 tcg_gen_ext32u_i64(cpu_reg(s
, rd
), cpu_reg(s
, rd
));
4696 /* Add/subtract (with carry)
4697 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
4698 * +--+--+--+------------------------+------+-------------+------+-----+
4699 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd |
4700 * +--+--+--+------------------------+------+-------------+------+-----+
4703 static void disas_adc_sbc(DisasContext
*s
, uint32_t insn
)
4705 unsigned int sf
, op
, setflags
, rm
, rn
, rd
;
4706 TCGv_i64 tcg_y
, tcg_rn
, tcg_rd
;
4708 sf
= extract32(insn
, 31, 1);
4709 op
= extract32(insn
, 30, 1);
4710 setflags
= extract32(insn
, 29, 1);
4711 rm
= extract32(insn
, 16, 5);
4712 rn
= extract32(insn
, 5, 5);
4713 rd
= extract32(insn
, 0, 5);
4715 tcg_rd
= cpu_reg(s
, rd
);
4716 tcg_rn
= cpu_reg(s
, rn
);
4719 tcg_y
= tcg_temp_new_i64();
4720 tcg_gen_not_i64(tcg_y
, cpu_reg(s
, rm
));
4722 tcg_y
= cpu_reg(s
, rm
);
4726 gen_adc_CC(sf
, tcg_rd
, tcg_rn
, tcg_y
);
4728 gen_adc(sf
, tcg_rd
, tcg_rn
, tcg_y
);
4733 * Rotate right into flags
4734 * 31 30 29 21 15 10 5 4 0
4735 * +--+--+--+-----------------+--------+-----------+------+--+------+
4736 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask |
4737 * +--+--+--+-----------------+--------+-----------+------+--+------+
4739 static void disas_rotate_right_into_flags(DisasContext
*s
, uint32_t insn
)
4741 int mask
= extract32(insn
, 0, 4);
4742 int o2
= extract32(insn
, 4, 1);
4743 int rn
= extract32(insn
, 5, 5);
4744 int imm6
= extract32(insn
, 15, 6);
4745 int sf_op_s
= extract32(insn
, 29, 3);
4749 if (sf_op_s
!= 5 || o2
!= 0 || !dc_isar_feature(aa64_condm_4
, s
)) {
4750 unallocated_encoding(s
);
4754 tcg_rn
= read_cpu_reg(s
, rn
, 1);
4755 tcg_gen_rotri_i64(tcg_rn
, tcg_rn
, imm6
);
4757 nzcv
= tcg_temp_new_i32();
4758 tcg_gen_extrl_i64_i32(nzcv
, tcg_rn
);
4760 if (mask
& 8) { /* N */
4761 tcg_gen_shli_i32(cpu_NF
, nzcv
, 31 - 3);
4763 if (mask
& 4) { /* Z */
4764 tcg_gen_not_i32(cpu_ZF
, nzcv
);
4765 tcg_gen_andi_i32(cpu_ZF
, cpu_ZF
, 4);
4767 if (mask
& 2) { /* C */
4768 tcg_gen_extract_i32(cpu_CF
, nzcv
, 1, 1);
4770 if (mask
& 1) { /* V */
4771 tcg_gen_shli_i32(cpu_VF
, nzcv
, 31 - 0);
4776 * Evaluate into flags
4777 * 31 30 29 21 15 14 10 5 4 0
4778 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4779 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask |
4780 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4782 static void disas_evaluate_into_flags(DisasContext
*s
, uint32_t insn
)
4784 int o3_mask
= extract32(insn
, 0, 5);
4785 int rn
= extract32(insn
, 5, 5);
4786 int o2
= extract32(insn
, 15, 6);
4787 int sz
= extract32(insn
, 14, 1);
4788 int sf_op_s
= extract32(insn
, 29, 3);
4792 if (sf_op_s
!= 1 || o2
!= 0 || o3_mask
!= 0xd ||
4793 !dc_isar_feature(aa64_condm_4
, s
)) {
4794 unallocated_encoding(s
);
4797 shift
= sz
? 16 : 24; /* SETF16 or SETF8 */
4799 tmp
= tcg_temp_new_i32();
4800 tcg_gen_extrl_i64_i32(tmp
, cpu_reg(s
, rn
));
4801 tcg_gen_shli_i32(cpu_NF
, tmp
, shift
);
4802 tcg_gen_shli_i32(cpu_VF
, tmp
, shift
- 1);
4803 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
4804 tcg_gen_xor_i32(cpu_VF
, cpu_VF
, cpu_NF
);
4807 /* Conditional compare (immediate / register)
4808 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
4809 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4810 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv |
4811 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4814 static void disas_cc(DisasContext
*s
, uint32_t insn
)
4816 unsigned int sf
, op
, y
, cond
, rn
, nzcv
, is_imm
;
4817 TCGv_i32 tcg_t0
, tcg_t1
, tcg_t2
;
4818 TCGv_i64 tcg_tmp
, tcg_y
, tcg_rn
;
4821 if (!extract32(insn
, 29, 1)) {
4822 unallocated_encoding(s
);
4825 if (insn
& (1 << 10 | 1 << 4)) {
4826 unallocated_encoding(s
);
4829 sf
= extract32(insn
, 31, 1);
4830 op
= extract32(insn
, 30, 1);
4831 is_imm
= extract32(insn
, 11, 1);
4832 y
= extract32(insn
, 16, 5); /* y = rm (reg) or imm5 (imm) */
4833 cond
= extract32(insn
, 12, 4);
4834 rn
= extract32(insn
, 5, 5);
4835 nzcv
= extract32(insn
, 0, 4);
4837 /* Set T0 = !COND. */
4838 tcg_t0
= tcg_temp_new_i32();
4839 arm_test_cc(&c
, cond
);
4840 tcg_gen_setcondi_i32(tcg_invert_cond(c
.cond
), tcg_t0
, c
.value
, 0);
4842 /* Load the arguments for the new comparison. */
4844 tcg_y
= tcg_temp_new_i64();
4845 tcg_gen_movi_i64(tcg_y
, y
);
4847 tcg_y
= cpu_reg(s
, y
);
4849 tcg_rn
= cpu_reg(s
, rn
);
4851 /* Set the flags for the new comparison. */
4852 tcg_tmp
= tcg_temp_new_i64();
4854 gen_sub_CC(sf
, tcg_tmp
, tcg_rn
, tcg_y
);
4856 gen_add_CC(sf
, tcg_tmp
, tcg_rn
, tcg_y
);
4859 /* If COND was false, force the flags to #nzcv. Compute two masks
4860 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4861 * For tcg hosts that support ANDC, we can make do with just T1.
4862 * In either case, allow the tcg optimizer to delete any unused mask.
4864 tcg_t1
= tcg_temp_new_i32();
4865 tcg_t2
= tcg_temp_new_i32();
4866 tcg_gen_neg_i32(tcg_t1
, tcg_t0
);
4867 tcg_gen_subi_i32(tcg_t2
, tcg_t0
, 1);
4869 if (nzcv
& 8) { /* N */
4870 tcg_gen_or_i32(cpu_NF
, cpu_NF
, tcg_t1
);
4872 if (TCG_TARGET_HAS_andc_i32
) {
4873 tcg_gen_andc_i32(cpu_NF
, cpu_NF
, tcg_t1
);
4875 tcg_gen_and_i32(cpu_NF
, cpu_NF
, tcg_t2
);
4878 if (nzcv
& 4) { /* Z */
4879 if (TCG_TARGET_HAS_andc_i32
) {
4880 tcg_gen_andc_i32(cpu_ZF
, cpu_ZF
, tcg_t1
);
4882 tcg_gen_and_i32(cpu_ZF
, cpu_ZF
, tcg_t2
);
4885 tcg_gen_or_i32(cpu_ZF
, cpu_ZF
, tcg_t0
);
4887 if (nzcv
& 2) { /* C */
4888 tcg_gen_or_i32(cpu_CF
, cpu_CF
, tcg_t0
);
4890 if (TCG_TARGET_HAS_andc_i32
) {
4891 tcg_gen_andc_i32(cpu_CF
, cpu_CF
, tcg_t1
);
4893 tcg_gen_and_i32(cpu_CF
, cpu_CF
, tcg_t2
);
4896 if (nzcv
& 1) { /* V */
4897 tcg_gen_or_i32(cpu_VF
, cpu_VF
, tcg_t1
);
4899 if (TCG_TARGET_HAS_andc_i32
) {
4900 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, tcg_t1
);
4902 tcg_gen_and_i32(cpu_VF
, cpu_VF
, tcg_t2
);
4907 /* Conditional select
4908 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0
4909 * +----+----+---+-----------------+------+------+-----+------+------+
4910 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd |
4911 * +----+----+---+-----------------+------+------+-----+------+------+
4913 static void disas_cond_select(DisasContext
*s
, uint32_t insn
)
4915 unsigned int sf
, else_inv
, rm
, cond
, else_inc
, rn
, rd
;
4916 TCGv_i64 tcg_rd
, zero
;
4919 if (extract32(insn
, 29, 1) || extract32(insn
, 11, 1)) {
4920 /* S == 1 or op2<1> == 1 */
4921 unallocated_encoding(s
);
4924 sf
= extract32(insn
, 31, 1);
4925 else_inv
= extract32(insn
, 30, 1);
4926 rm
= extract32(insn
, 16, 5);
4927 cond
= extract32(insn
, 12, 4);
4928 else_inc
= extract32(insn
, 10, 1);
4929 rn
= extract32(insn
, 5, 5);
4930 rd
= extract32(insn
, 0, 5);
4932 tcg_rd
= cpu_reg(s
, rd
);
4934 a64_test_cc(&c
, cond
);
4935 zero
= tcg_constant_i64(0);
4937 if (rn
== 31 && rm
== 31 && (else_inc
^ else_inv
)) {
4940 tcg_gen_negsetcond_i64(tcg_invert_cond(c
.cond
),
4941 tcg_rd
, c
.value
, zero
);
4943 tcg_gen_setcond_i64(tcg_invert_cond(c
.cond
),
4944 tcg_rd
, c
.value
, zero
);
4947 TCGv_i64 t_true
= cpu_reg(s
, rn
);
4948 TCGv_i64 t_false
= read_cpu_reg(s
, rm
, 1);
4949 if (else_inv
&& else_inc
) {
4950 tcg_gen_neg_i64(t_false
, t_false
);
4951 } else if (else_inv
) {
4952 tcg_gen_not_i64(t_false
, t_false
);
4953 } else if (else_inc
) {
4954 tcg_gen_addi_i64(t_false
, t_false
, 1);
4956 tcg_gen_movcond_i64(c
.cond
, tcg_rd
, c
.value
, zero
, t_true
, t_false
);
4960 tcg_gen_ext32u_i64(tcg_rd
, tcg_rd
);
4964 static void handle_clz(DisasContext
*s
, unsigned int sf
,
4965 unsigned int rn
, unsigned int rd
)
4967 TCGv_i64 tcg_rd
, tcg_rn
;
4968 tcg_rd
= cpu_reg(s
, rd
);
4969 tcg_rn
= cpu_reg(s
, rn
);
4972 tcg_gen_clzi_i64(tcg_rd
, tcg_rn
, 64);
4974 TCGv_i32 tcg_tmp32
= tcg_temp_new_i32();
4975 tcg_gen_extrl_i64_i32(tcg_tmp32
, tcg_rn
);
4976 tcg_gen_clzi_i32(tcg_tmp32
, tcg_tmp32
, 32);
4977 tcg_gen_extu_i32_i64(tcg_rd
, tcg_tmp32
);
4981 static void handle_cls(DisasContext
*s
, unsigned int sf
,
4982 unsigned int rn
, unsigned int rd
)
4984 TCGv_i64 tcg_rd
, tcg_rn
;
4985 tcg_rd
= cpu_reg(s
, rd
);
4986 tcg_rn
= cpu_reg(s
, rn
);
4989 tcg_gen_clrsb_i64(tcg_rd
, tcg_rn
);
4991 TCGv_i32 tcg_tmp32
= tcg_temp_new_i32();
4992 tcg_gen_extrl_i64_i32(tcg_tmp32
, tcg_rn
);
4993 tcg_gen_clrsb_i32(tcg_tmp32
, tcg_tmp32
);
4994 tcg_gen_extu_i32_i64(tcg_rd
, tcg_tmp32
);
4998 static void handle_rbit(DisasContext
*s
, unsigned int sf
,
4999 unsigned int rn
, unsigned int rd
)
5001 TCGv_i64 tcg_rd
, tcg_rn
;
5002 tcg_rd
= cpu_reg(s
, rd
);
5003 tcg_rn
= cpu_reg(s
, rn
);
5006 gen_helper_rbit64(tcg_rd
, tcg_rn
);
5008 TCGv_i32 tcg_tmp32
= tcg_temp_new_i32();
5009 tcg_gen_extrl_i64_i32(tcg_tmp32
, tcg_rn
);
5010 gen_helper_rbit(tcg_tmp32
, tcg_tmp32
);
5011 tcg_gen_extu_i32_i64(tcg_rd
, tcg_tmp32
);
5015 /* REV with sf==1, opcode==3 ("REV64") */
5016 static void handle_rev64(DisasContext
*s
, unsigned int sf
,
5017 unsigned int rn
, unsigned int rd
)
5020 unallocated_encoding(s
);
5023 tcg_gen_bswap64_i64(cpu_reg(s
, rd
), cpu_reg(s
, rn
));
5026 /* REV with sf==0, opcode==2
5027 * REV32 (sf==1, opcode==2)
5029 static void handle_rev32(DisasContext
*s
, unsigned int sf
,
5030 unsigned int rn
, unsigned int rd
)
5032 TCGv_i64 tcg_rd
= cpu_reg(s
, rd
);
5033 TCGv_i64 tcg_rn
= cpu_reg(s
, rn
);
5036 tcg_gen_bswap64_i64(tcg_rd
, tcg_rn
);
5037 tcg_gen_rotri_i64(tcg_rd
, tcg_rd
, 32);
5039 tcg_gen_bswap32_i64(tcg_rd
, tcg_rn
, TCG_BSWAP_OZ
);
5043 /* REV16 (opcode==1) */
5044 static void handle_rev16(DisasContext
*s
, unsigned int sf
,
5045 unsigned int rn
, unsigned int rd
)
5047 TCGv_i64 tcg_rd
= cpu_reg(s
, rd
);
5048 TCGv_i64 tcg_tmp
= tcg_temp_new_i64();
5049 TCGv_i64 tcg_rn
= read_cpu_reg(s
, rn
, sf
);
5050 TCGv_i64 mask
= tcg_constant_i64(sf
? 0x00ff00ff00ff00ffull
: 0x00ff00ff);
5052 tcg_gen_shri_i64(tcg_tmp
, tcg_rn
, 8);
5053 tcg_gen_and_i64(tcg_rd
, tcg_rn
, mask
);
5054 tcg_gen_and_i64(tcg_tmp
, tcg_tmp
, mask
);
5055 tcg_gen_shli_i64(tcg_rd
, tcg_rd
, 8);
5056 tcg_gen_or_i64(tcg_rd
, tcg_rd
, tcg_tmp
);
5059 /* Data-processing (1 source)
5060 * 31 30 29 28 21 20 16 15 10 9 5 4 0
5061 * +----+---+---+-----------------+---------+--------+------+------+
5062 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd |
5063 * +----+---+---+-----------------+---------+--------+------+------+
5065 static void disas_data_proc_1src(DisasContext
*s
, uint32_t insn
)
5067 unsigned int sf
, opcode
, opcode2
, rn
, rd
;
5070 if (extract32(insn
, 29, 1)) {
5071 unallocated_encoding(s
);
5075 sf
= extract32(insn
, 31, 1);
5076 opcode
= extract32(insn
, 10, 6);
5077 opcode2
= extract32(insn
, 16, 5);
5078 rn
= extract32(insn
, 5, 5);
5079 rd
= extract32(insn
, 0, 5);
5081 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5083 switch (MAP(sf
, opcode2
, opcode
)) {
5084 case MAP(0, 0x00, 0x00): /* RBIT */
5085 case MAP(1, 0x00, 0x00):
5086 handle_rbit(s
, sf
, rn
, rd
);
5088 case MAP(0, 0x00, 0x01): /* REV16 */
5089 case MAP(1, 0x00, 0x01):
5090 handle_rev16(s
, sf
, rn
, rd
);
5092 case MAP(0, 0x00, 0x02): /* REV/REV32 */
5093 case MAP(1, 0x00, 0x02):
5094 handle_rev32(s
, sf
, rn
, rd
);
5096 case MAP(1, 0x00, 0x03): /* REV64 */
5097 handle_rev64(s
, sf
, rn
, rd
);
5099 case MAP(0, 0x00, 0x04): /* CLZ */
5100 case MAP(1, 0x00, 0x04):
5101 handle_clz(s
, sf
, rn
, rd
);
5103 case MAP(0, 0x00, 0x05): /* CLS */
5104 case MAP(1, 0x00, 0x05):
5105 handle_cls(s
, sf
, rn
, rd
);
5107 case MAP(1, 0x01, 0x00): /* PACIA */
5108 if (s
->pauth_active
) {
5109 tcg_rd
= cpu_reg(s
, rd
);
5110 gen_helper_pacia(tcg_rd
, cpu_env
, tcg_rd
, cpu_reg_sp(s
, rn
));
5111 } else if (!dc_isar_feature(aa64_pauth
, s
)) {
5112 goto do_unallocated
;
5115 case MAP(1, 0x01, 0x01): /* PACIB */
5116 if (s
->pauth_active
) {
5117 tcg_rd
= cpu_reg(s
, rd
);
5118 gen_helper_pacib(tcg_rd
, cpu_env
, tcg_rd
, cpu_reg_sp(s
, rn
));
5119 } else if (!dc_isar_feature(aa64_pauth
, s
)) {
5120 goto do_unallocated
;
5123 case MAP(1, 0x01, 0x02): /* PACDA */
5124 if (s
->pauth_active
) {
5125 tcg_rd
= cpu_reg(s
, rd
);
5126 gen_helper_pacda(tcg_rd
, cpu_env
, tcg_rd
, cpu_reg_sp(s
, rn
));
5127 } else if (!dc_isar_feature(aa64_pauth
, s
)) {
5128 goto do_unallocated
;
5131 case MAP(1, 0x01, 0x03): /* PACDB */
5132 if (s
->pauth_active
) {
5133 tcg_rd
= cpu_reg(s
, rd
);
5134 gen_helper_pacdb(tcg_rd
, cpu_env
, tcg_rd
, cpu_reg_sp(s
, rn
));
5135 } else if (!dc_isar_feature(aa64_pauth
, s
)) {
5136 goto do_unallocated
;
5139 case MAP(1, 0x01, 0x04): /* AUTIA */
5140 if (s
->pauth_active
) {
5141 tcg_rd
= cpu_reg(s
, rd
);
5142 gen_helper_autia(tcg_rd
, cpu_env
, tcg_rd
, cpu_reg_sp(s
, rn
));
5143 } else if (!dc_isar_feature(aa64_pauth
, s
)) {
5144 goto do_unallocated
;
5147 case MAP(1, 0x01, 0x05): /* AUTIB */
5148 if (s
->pauth_active
) {
5149 tcg_rd
= cpu_reg(s
, rd
);
5150 gen_helper_autib(tcg_rd
, cpu_env
, tcg_rd
, cpu_reg_sp(s
, rn
));
5151 } else if (!dc_isar_feature(aa64_pauth
, s
)) {
5152 goto do_unallocated
;
5155 case MAP(1, 0x01, 0x06): /* AUTDA */
5156 if (s
->pauth_active
) {
5157 tcg_rd
= cpu_reg(s
, rd
);
5158 gen_helper_autda(tcg_rd
, cpu_env
, tcg_rd
, cpu_reg_sp(s
, rn
));
5159 } else if (!dc_isar_feature(aa64_pauth
, s
)) {
5160 goto do_unallocated
;
5163 case MAP(1, 0x01, 0x07): /* AUTDB */
5164 if (s
->pauth_active
) {
5165 tcg_rd
= cpu_reg(s
, rd
);
5166 gen_helper_autdb(tcg_rd
, cpu_env
, tcg_rd
, cpu_reg_sp(s
, rn
));
5167 } else if (!dc_isar_feature(aa64_pauth
, s
)) {
5168 goto do_unallocated
;
5171 case MAP(1, 0x01, 0x08): /* PACIZA */
5172 if (!dc_isar_feature(aa64_pauth
, s
) || rn
!= 31) {
5173 goto do_unallocated
;
5174 } else if (s
->pauth_active
) {
5175 tcg_rd
= cpu_reg(s
, rd
);
5176 gen_helper_pacia(tcg_rd
, cpu_env
, tcg_rd
, tcg_constant_i64(0));
5179 case MAP(1, 0x01, 0x09): /* PACIZB */
5180 if (!dc_isar_feature(aa64_pauth
, s
) || rn
!= 31) {
5181 goto do_unallocated
;
5182 } else if (s
->pauth_active
) {
5183 tcg_rd
= cpu_reg(s
, rd
);
5184 gen_helper_pacib(tcg_rd
, cpu_env
, tcg_rd
, tcg_constant_i64(0));
5187 case MAP(1, 0x01, 0x0a): /* PACDZA */
5188 if (!dc_isar_feature(aa64_pauth
, s
) || rn
!= 31) {
5189 goto do_unallocated
;
5190 } else if (s
->pauth_active
) {
5191 tcg_rd
= cpu_reg(s
, rd
);
5192 gen_helper_pacda(tcg_rd
, cpu_env
, tcg_rd
, tcg_constant_i64(0));
5195 case MAP(1, 0x01, 0x0b): /* PACDZB */
5196 if (!dc_isar_feature(aa64_pauth
, s
) || rn
!= 31) {
5197 goto do_unallocated
;
5198 } else if (s
->pauth_active
) {
5199 tcg_rd
= cpu_reg(s
, rd
);
5200 gen_helper_pacdb(tcg_rd
, cpu_env
, tcg_rd
, tcg_constant_i64(0));
5203 case MAP(1, 0x01, 0x0c): /* AUTIZA */
5204 if (!dc_isar_feature(aa64_pauth
, s
) || rn
!= 31) {
5205 goto do_unallocated
;
5206 } else if (s
->pauth_active
) {
5207 tcg_rd
= cpu_reg(s
, rd
);
5208 gen_helper_autia(tcg_rd
, cpu_env
, tcg_rd
, tcg_constant_i64(0));
5211 case MAP(1, 0x01, 0x0d): /* AUTIZB */
5212 if (!dc_isar_feature(aa64_pauth
, s
) || rn
!= 31) {
5213 goto do_unallocated
;
5214 } else if (s
->pauth_active
) {
5215 tcg_rd
= cpu_reg(s
, rd
);
5216 gen_helper_autib(tcg_rd
, cpu_env
, tcg_rd
, tcg_constant_i64(0));
5219 case MAP(1, 0x01, 0x0e): /* AUTDZA */
5220 if (!dc_isar_feature(aa64_pauth
, s
) || rn
!= 31) {
5221 goto do_unallocated
;
5222 } else if (s
->pauth_active
) {
5223 tcg_rd
= cpu_reg(s
, rd
);
5224 gen_helper_autda(tcg_rd
, cpu_env
, tcg_rd
, tcg_constant_i64(0));
5227 case MAP(1, 0x01, 0x0f): /* AUTDZB */
5228 if (!dc_isar_feature(aa64_pauth
, s
) || rn
!= 31) {
5229 goto do_unallocated
;
5230 } else if (s
->pauth_active
) {
5231 tcg_rd
= cpu_reg(s
, rd
);
5232 gen_helper_autdb(tcg_rd
, cpu_env
, tcg_rd
, tcg_constant_i64(0));
5235 case MAP(1, 0x01, 0x10): /* XPACI */
5236 if (!dc_isar_feature(aa64_pauth
, s
) || rn
!= 31) {
5237 goto do_unallocated
;
5238 } else if (s
->pauth_active
) {
5239 tcg_rd
= cpu_reg(s
, rd
);
5240 gen_helper_xpaci(tcg_rd
, cpu_env
, tcg_rd
);
5243 case MAP(1, 0x01, 0x11): /* XPACD */
5244 if (!dc_isar_feature(aa64_pauth
, s
) || rn
!= 31) {
5245 goto do_unallocated
;
5246 } else if (s
->pauth_active
) {
5247 tcg_rd
= cpu_reg(s
, rd
);
5248 gen_helper_xpacd(tcg_rd
, cpu_env
, tcg_rd
);
5253 unallocated_encoding(s
);
5260 static void handle_div(DisasContext
*s
, bool is_signed
, unsigned int sf
,
5261 unsigned int rm
, unsigned int rn
, unsigned int rd
)
5263 TCGv_i64 tcg_n
, tcg_m
, tcg_rd
;
5264 tcg_rd
= cpu_reg(s
, rd
);
5266 if (!sf
&& is_signed
) {
5267 tcg_n
= tcg_temp_new_i64();
5268 tcg_m
= tcg_temp_new_i64();
5269 tcg_gen_ext32s_i64(tcg_n
, cpu_reg(s
, rn
));
5270 tcg_gen_ext32s_i64(tcg_m
, cpu_reg(s
, rm
));
5272 tcg_n
= read_cpu_reg(s
, rn
, sf
);
5273 tcg_m
= read_cpu_reg(s
, rm
, sf
);
5277 gen_helper_sdiv64(tcg_rd
, tcg_n
, tcg_m
);
5279 gen_helper_udiv64(tcg_rd
, tcg_n
, tcg_m
);
5282 if (!sf
) { /* zero extend final result */
5283 tcg_gen_ext32u_i64(tcg_rd
, tcg_rd
);
5287 /* LSLV, LSRV, ASRV, RORV */
5288 static void handle_shift_reg(DisasContext
*s
,
5289 enum a64_shift_type shift_type
, unsigned int sf
,
5290 unsigned int rm
, unsigned int rn
, unsigned int rd
)
5292 TCGv_i64 tcg_shift
= tcg_temp_new_i64();
5293 TCGv_i64 tcg_rd
= cpu_reg(s
, rd
);
5294 TCGv_i64 tcg_rn
= read_cpu_reg(s
, rn
, sf
);
5296 tcg_gen_andi_i64(tcg_shift
, cpu_reg(s
, rm
), sf
? 63 : 31);
5297 shift_reg(tcg_rd
, tcg_rn
, sf
, shift_type
, tcg_shift
);
5300 /* CRC32[BHWX], CRC32C[BHWX] */
5301 static void handle_crc32(DisasContext
*s
,
5302 unsigned int sf
, unsigned int sz
, bool crc32c
,
5303 unsigned int rm
, unsigned int rn
, unsigned int rd
)
5305 TCGv_i64 tcg_acc
, tcg_val
;
5308 if (!dc_isar_feature(aa64_crc32
, s
)
5309 || (sf
== 1 && sz
!= 3)
5310 || (sf
== 0 && sz
== 3)) {
5311 unallocated_encoding(s
);
5316 tcg_val
= cpu_reg(s
, rm
);
5330 g_assert_not_reached();
5332 tcg_val
= tcg_temp_new_i64();
5333 tcg_gen_andi_i64(tcg_val
, cpu_reg(s
, rm
), mask
);
5336 tcg_acc
= cpu_reg(s
, rn
);
5337 tcg_bytes
= tcg_constant_i32(1 << sz
);
5340 gen_helper_crc32c_64(cpu_reg(s
, rd
), tcg_acc
, tcg_val
, tcg_bytes
);
5342 gen_helper_crc32_64(cpu_reg(s
, rd
), tcg_acc
, tcg_val
, tcg_bytes
);
5346 /* Data-processing (2 source)
5347 * 31 30 29 28 21 20 16 15 10 9 5 4 0
5348 * +----+---+---+-----------------+------+--------+------+------+
5349 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd |
5350 * +----+---+---+-----------------+------+--------+------+------+
5352 static void disas_data_proc_2src(DisasContext
*s
, uint32_t insn
)
5354 unsigned int sf
, rm
, opcode
, rn
, rd
, setflag
;
5355 sf
= extract32(insn
, 31, 1);
5356 setflag
= extract32(insn
, 29, 1);
5357 rm
= extract32(insn
, 16, 5);
5358 opcode
= extract32(insn
, 10, 6);
5359 rn
= extract32(insn
, 5, 5);
5360 rd
= extract32(insn
, 0, 5);
5362 if (setflag
&& opcode
!= 0) {
5363 unallocated_encoding(s
);
5368 case 0: /* SUBP(S) */
5369 if (sf
== 0 || !dc_isar_feature(aa64_mte_insn_reg
, s
)) {
5370 goto do_unallocated
;
5372 TCGv_i64 tcg_n
, tcg_m
, tcg_d
;
5374 tcg_n
= read_cpu_reg_sp(s
, rn
, true);
5375 tcg_m
= read_cpu_reg_sp(s
, rm
, true);
5376 tcg_gen_sextract_i64(tcg_n
, tcg_n
, 0, 56);
5377 tcg_gen_sextract_i64(tcg_m
, tcg_m
, 0, 56);
5378 tcg_d
= cpu_reg(s
, rd
);
5381 gen_sub_CC(true, tcg_d
, tcg_n
, tcg_m
);
5383 tcg_gen_sub_i64(tcg_d
, tcg_n
, tcg_m
);
5388 handle_div(s
, false, sf
, rm
, rn
, rd
);
5391 handle_div(s
, true, sf
, rm
, rn
, rd
);
5394 if (sf
== 0 || !dc_isar_feature(aa64_mte_insn_reg
, s
)) {
5395 goto do_unallocated
;
5398 gen_helper_irg(cpu_reg_sp(s
, rd
), cpu_env
,
5399 cpu_reg_sp(s
, rn
), cpu_reg(s
, rm
));
5401 gen_address_with_allocation_tag0(cpu_reg_sp(s
, rd
),
5406 if (sf
== 0 || !dc_isar_feature(aa64_mte_insn_reg
, s
)) {
5407 goto do_unallocated
;
5409 TCGv_i64 t
= tcg_temp_new_i64();
5411 tcg_gen_extract_i64(t
, cpu_reg_sp(s
, rn
), 56, 4);
5412 tcg_gen_shl_i64(t
, tcg_constant_i64(1), t
);
5413 tcg_gen_or_i64(cpu_reg(s
, rd
), cpu_reg(s
, rm
), t
);
5417 handle_shift_reg(s
, A64_SHIFT_TYPE_LSL
, sf
, rm
, rn
, rd
);
5420 handle_shift_reg(s
, A64_SHIFT_TYPE_LSR
, sf
, rm
, rn
, rd
);
5423 handle_shift_reg(s
, A64_SHIFT_TYPE_ASR
, sf
, rm
, rn
, rd
);
5426 handle_shift_reg(s
, A64_SHIFT_TYPE_ROR
, sf
, rm
, rn
, rd
);
5428 case 12: /* PACGA */
5429 if (sf
== 0 || !dc_isar_feature(aa64_pauth
, s
)) {
5430 goto do_unallocated
;
5432 gen_helper_pacga(cpu_reg(s
, rd
), cpu_env
,
5433 cpu_reg(s
, rn
), cpu_reg_sp(s
, rm
));
5442 case 23: /* CRC32 */
5444 int sz
= extract32(opcode
, 0, 2);
5445 bool crc32c
= extract32(opcode
, 2, 1);
5446 handle_crc32(s
, sf
, sz
, crc32c
, rm
, rn
, rd
);
5451 unallocated_encoding(s
);
5457 * Data processing - register
5458 * 31 30 29 28 25 21 20 16 10 0
5459 * +--+---+--+---+-------+-----+-------+-------+---------+
5460 * | |op0| |op1| 1 0 1 | op2 | | op3 | |
5461 * +--+---+--+---+-------+-----+-------+-------+---------+
5463 static void disas_data_proc_reg(DisasContext
*s
, uint32_t insn
)
5465 int op0
= extract32(insn
, 30, 1);
5466 int op1
= extract32(insn
, 28, 1);
5467 int op2
= extract32(insn
, 21, 4);
5468 int op3
= extract32(insn
, 10, 6);
5473 /* Add/sub (extended register) */
5474 disas_add_sub_ext_reg(s
, insn
);
5476 /* Add/sub (shifted register) */
5477 disas_add_sub_reg(s
, insn
);
5480 /* Logical (shifted register) */
5481 disas_logic_reg(s
, insn
);
5489 case 0x00: /* Add/subtract (with carry) */
5490 disas_adc_sbc(s
, insn
);
5493 case 0x01: /* Rotate right into flags */
5495 disas_rotate_right_into_flags(s
, insn
);
5498 case 0x02: /* Evaluate into flags */
5502 disas_evaluate_into_flags(s
, insn
);
5506 goto do_unallocated
;
5510 case 0x2: /* Conditional compare */
5511 disas_cc(s
, insn
); /* both imm and reg forms */
5514 case 0x4: /* Conditional select */
5515 disas_cond_select(s
, insn
);
5518 case 0x6: /* Data-processing */
5519 if (op0
) { /* (1 source) */
5520 disas_data_proc_1src(s
, insn
);
5521 } else { /* (2 source) */
5522 disas_data_proc_2src(s
, insn
);
5525 case 0x8 ... 0xf: /* (3 source) */
5526 disas_data_proc_3src(s
, insn
);
5531 unallocated_encoding(s
);
5536 static void handle_fp_compare(DisasContext
*s
, int size
,
5537 unsigned int rn
, unsigned int rm
,
5538 bool cmp_with_zero
, bool signal_all_nans
)
5540 TCGv_i64 tcg_flags
= tcg_temp_new_i64();
5541 TCGv_ptr fpst
= fpstatus_ptr(size
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
5543 if (size
== MO_64
) {
5544 TCGv_i64 tcg_vn
, tcg_vm
;
5546 tcg_vn
= read_fp_dreg(s
, rn
);
5547 if (cmp_with_zero
) {
5548 tcg_vm
= tcg_constant_i64(0);
5550 tcg_vm
= read_fp_dreg(s
, rm
);
5552 if (signal_all_nans
) {
5553 gen_helper_vfp_cmped_a64(tcg_flags
, tcg_vn
, tcg_vm
, fpst
);
5555 gen_helper_vfp_cmpd_a64(tcg_flags
, tcg_vn
, tcg_vm
, fpst
);
5558 TCGv_i32 tcg_vn
= tcg_temp_new_i32();
5559 TCGv_i32 tcg_vm
= tcg_temp_new_i32();
5561 read_vec_element_i32(s
, tcg_vn
, rn
, 0, size
);
5562 if (cmp_with_zero
) {
5563 tcg_gen_movi_i32(tcg_vm
, 0);
5565 read_vec_element_i32(s
, tcg_vm
, rm
, 0, size
);
5570 if (signal_all_nans
) {
5571 gen_helper_vfp_cmpes_a64(tcg_flags
, tcg_vn
, tcg_vm
, fpst
);
5573 gen_helper_vfp_cmps_a64(tcg_flags
, tcg_vn
, tcg_vm
, fpst
);
5577 if (signal_all_nans
) {
5578 gen_helper_vfp_cmpeh_a64(tcg_flags
, tcg_vn
, tcg_vm
, fpst
);
5580 gen_helper_vfp_cmph_a64(tcg_flags
, tcg_vn
, tcg_vm
, fpst
);
5584 g_assert_not_reached();
5588 gen_set_nzcv(tcg_flags
);
5591 /* Floating point compare
5592 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0
5593 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5594 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 |
5595 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5597 static void disas_fp_compare(DisasContext
*s
, uint32_t insn
)
5599 unsigned int mos
, type
, rm
, op
, rn
, opc
, op2r
;
5602 mos
= extract32(insn
, 29, 3);
5603 type
= extract32(insn
, 22, 2);
5604 rm
= extract32(insn
, 16, 5);
5605 op
= extract32(insn
, 14, 2);
5606 rn
= extract32(insn
, 5, 5);
5607 opc
= extract32(insn
, 3, 2);
5608 op2r
= extract32(insn
, 0, 3);
5610 if (mos
|| op
|| op2r
) {
5611 unallocated_encoding(s
);
5624 if (dc_isar_feature(aa64_fp16
, s
)) {
5629 unallocated_encoding(s
);
5633 if (!fp_access_check(s
)) {
5637 handle_fp_compare(s
, size
, rn
, rm
, opc
& 1, opc
& 2);
5640 /* Floating point conditional compare
5641 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
5642 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5643 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv |
5644 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5646 static void disas_fp_ccomp(DisasContext
*s
, uint32_t insn
)
5648 unsigned int mos
, type
, rm
, cond
, rn
, op
, nzcv
;
5649 TCGLabel
*label_continue
= NULL
;
5652 mos
= extract32(insn
, 29, 3);
5653 type
= extract32(insn
, 22, 2);
5654 rm
= extract32(insn
, 16, 5);
5655 cond
= extract32(insn
, 12, 4);
5656 rn
= extract32(insn
, 5, 5);
5657 op
= extract32(insn
, 4, 1);
5658 nzcv
= extract32(insn
, 0, 4);
5661 unallocated_encoding(s
);
5674 if (dc_isar_feature(aa64_fp16
, s
)) {
5679 unallocated_encoding(s
);
5683 if (!fp_access_check(s
)) {
5687 if (cond
< 0x0e) { /* not always */
5688 TCGLabel
*label_match
= gen_new_label();
5689 label_continue
= gen_new_label();
5690 arm_gen_test_cc(cond
, label_match
);
5692 gen_set_nzcv(tcg_constant_i64(nzcv
<< 28));
5693 tcg_gen_br(label_continue
);
5694 gen_set_label(label_match
);
5697 handle_fp_compare(s
, size
, rn
, rm
, false, op
);
5700 gen_set_label(label_continue
);
5704 /* Floating point conditional select
5705 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
5706 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5707 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd |
5708 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5710 static void disas_fp_csel(DisasContext
*s
, uint32_t insn
)
5712 unsigned int mos
, type
, rm
, cond
, rn
, rd
;
5713 TCGv_i64 t_true
, t_false
;
5717 mos
= extract32(insn
, 29, 3);
5718 type
= extract32(insn
, 22, 2);
5719 rm
= extract32(insn
, 16, 5);
5720 cond
= extract32(insn
, 12, 4);
5721 rn
= extract32(insn
, 5, 5);
5722 rd
= extract32(insn
, 0, 5);
5725 unallocated_encoding(s
);
5738 if (dc_isar_feature(aa64_fp16
, s
)) {
5743 unallocated_encoding(s
);
5747 if (!fp_access_check(s
)) {
5751 /* Zero extend sreg & hreg inputs to 64 bits now. */
5752 t_true
= tcg_temp_new_i64();
5753 t_false
= tcg_temp_new_i64();
5754 read_vec_element(s
, t_true
, rn
, 0, sz
);
5755 read_vec_element(s
, t_false
, rm
, 0, sz
);
5757 a64_test_cc(&c
, cond
);
5758 tcg_gen_movcond_i64(c
.cond
, t_true
, c
.value
, tcg_constant_i64(0),
5761 /* Note that sregs & hregs write back zeros to the high bits,
5762 and we've already done the zero-extension. */
5763 write_fp_dreg(s
, rd
, t_true
);
5766 /* Floating-point data-processing (1 source) - half precision */
5767 static void handle_fp_1src_half(DisasContext
*s
, int opcode
, int rd
, int rn
)
5769 TCGv_ptr fpst
= NULL
;
5770 TCGv_i32 tcg_op
= read_fp_hreg(s
, rn
);
5771 TCGv_i32 tcg_res
= tcg_temp_new_i32();
5774 case 0x0: /* FMOV */
5775 tcg_gen_mov_i32(tcg_res
, tcg_op
);
5777 case 0x1: /* FABS */
5778 tcg_gen_andi_i32(tcg_res
, tcg_op
, 0x7fff);
5780 case 0x2: /* FNEG */
5781 tcg_gen_xori_i32(tcg_res
, tcg_op
, 0x8000);
5783 case 0x3: /* FSQRT */
5784 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
5785 gen_helper_sqrt_f16(tcg_res
, tcg_op
, fpst
);
5787 case 0x8: /* FRINTN */
5788 case 0x9: /* FRINTP */
5789 case 0xa: /* FRINTM */
5790 case 0xb: /* FRINTZ */
5791 case 0xc: /* FRINTA */
5795 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
5796 tcg_rmode
= gen_set_rmode(opcode
& 7, fpst
);
5797 gen_helper_advsimd_rinth(tcg_res
, tcg_op
, fpst
);
5798 gen_restore_rmode(tcg_rmode
, fpst
);
5801 case 0xe: /* FRINTX */
5802 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
5803 gen_helper_advsimd_rinth_exact(tcg_res
, tcg_op
, fpst
);
5805 case 0xf: /* FRINTI */
5806 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
5807 gen_helper_advsimd_rinth(tcg_res
, tcg_op
, fpst
);
5810 g_assert_not_reached();
5813 write_fp_sreg(s
, rd
, tcg_res
);
5816 /* Floating-point data-processing (1 source) - single precision */
5817 static void handle_fp_1src_single(DisasContext
*s
, int opcode
, int rd
, int rn
)
5819 void (*gen_fpst
)(TCGv_i32
, TCGv_i32
, TCGv_ptr
);
5820 TCGv_i32 tcg_op
, tcg_res
;
5824 tcg_op
= read_fp_sreg(s
, rn
);
5825 tcg_res
= tcg_temp_new_i32();
5828 case 0x0: /* FMOV */
5829 tcg_gen_mov_i32(tcg_res
, tcg_op
);
5831 case 0x1: /* FABS */
5832 gen_helper_vfp_abss(tcg_res
, tcg_op
);
5834 case 0x2: /* FNEG */
5835 gen_helper_vfp_negs(tcg_res
, tcg_op
);
5837 case 0x3: /* FSQRT */
5838 gen_helper_vfp_sqrts(tcg_res
, tcg_op
, cpu_env
);
5840 case 0x6: /* BFCVT */
5841 gen_fpst
= gen_helper_bfcvt
;
5843 case 0x8: /* FRINTN */
5844 case 0x9: /* FRINTP */
5845 case 0xa: /* FRINTM */
5846 case 0xb: /* FRINTZ */
5847 case 0xc: /* FRINTA */
5849 gen_fpst
= gen_helper_rints
;
5851 case 0xe: /* FRINTX */
5852 gen_fpst
= gen_helper_rints_exact
;
5854 case 0xf: /* FRINTI */
5855 gen_fpst
= gen_helper_rints
;
5857 case 0x10: /* FRINT32Z */
5858 rmode
= FPROUNDING_ZERO
;
5859 gen_fpst
= gen_helper_frint32_s
;
5861 case 0x11: /* FRINT32X */
5862 gen_fpst
= gen_helper_frint32_s
;
5864 case 0x12: /* FRINT64Z */
5865 rmode
= FPROUNDING_ZERO
;
5866 gen_fpst
= gen_helper_frint64_s
;
5868 case 0x13: /* FRINT64X */
5869 gen_fpst
= gen_helper_frint64_s
;
5872 g_assert_not_reached();
5875 fpst
= fpstatus_ptr(FPST_FPCR
);
5877 TCGv_i32 tcg_rmode
= gen_set_rmode(rmode
, fpst
);
5878 gen_fpst(tcg_res
, tcg_op
, fpst
);
5879 gen_restore_rmode(tcg_rmode
, fpst
);
5881 gen_fpst(tcg_res
, tcg_op
, fpst
);
5885 write_fp_sreg(s
, rd
, tcg_res
);
5888 /* Floating-point data-processing (1 source) - double precision */
5889 static void handle_fp_1src_double(DisasContext
*s
, int opcode
, int rd
, int rn
)
5891 void (*gen_fpst
)(TCGv_i64
, TCGv_i64
, TCGv_ptr
);
5892 TCGv_i64 tcg_op
, tcg_res
;
5897 case 0x0: /* FMOV */
5898 gen_gvec_fn2(s
, false, rd
, rn
, tcg_gen_gvec_mov
, 0);
5902 tcg_op
= read_fp_dreg(s
, rn
);
5903 tcg_res
= tcg_temp_new_i64();
5906 case 0x1: /* FABS */
5907 gen_helper_vfp_absd(tcg_res
, tcg_op
);
5909 case 0x2: /* FNEG */
5910 gen_helper_vfp_negd(tcg_res
, tcg_op
);
5912 case 0x3: /* FSQRT */
5913 gen_helper_vfp_sqrtd(tcg_res
, tcg_op
, cpu_env
);
5915 case 0x8: /* FRINTN */
5916 case 0x9: /* FRINTP */
5917 case 0xa: /* FRINTM */
5918 case 0xb: /* FRINTZ */
5919 case 0xc: /* FRINTA */
5921 gen_fpst
= gen_helper_rintd
;
5923 case 0xe: /* FRINTX */
5924 gen_fpst
= gen_helper_rintd_exact
;
5926 case 0xf: /* FRINTI */
5927 gen_fpst
= gen_helper_rintd
;
5929 case 0x10: /* FRINT32Z */
5930 rmode
= FPROUNDING_ZERO
;
5931 gen_fpst
= gen_helper_frint32_d
;
5933 case 0x11: /* FRINT32X */
5934 gen_fpst
= gen_helper_frint32_d
;
5936 case 0x12: /* FRINT64Z */
5937 rmode
= FPROUNDING_ZERO
;
5938 gen_fpst
= gen_helper_frint64_d
;
5940 case 0x13: /* FRINT64X */
5941 gen_fpst
= gen_helper_frint64_d
;
5944 g_assert_not_reached();
5947 fpst
= fpstatus_ptr(FPST_FPCR
);
5949 TCGv_i32 tcg_rmode
= gen_set_rmode(rmode
, fpst
);
5950 gen_fpst(tcg_res
, tcg_op
, fpst
);
5951 gen_restore_rmode(tcg_rmode
, fpst
);
5953 gen_fpst(tcg_res
, tcg_op
, fpst
);
5957 write_fp_dreg(s
, rd
, tcg_res
);
5960 static void handle_fp_fcvt(DisasContext
*s
, int opcode
,
5961 int rd
, int rn
, int dtype
, int ntype
)
5966 TCGv_i32 tcg_rn
= read_fp_sreg(s
, rn
);
5968 /* Single to double */
5969 TCGv_i64 tcg_rd
= tcg_temp_new_i64();
5970 gen_helper_vfp_fcvtds(tcg_rd
, tcg_rn
, cpu_env
);
5971 write_fp_dreg(s
, rd
, tcg_rd
);
5973 /* Single to half */
5974 TCGv_i32 tcg_rd
= tcg_temp_new_i32();
5975 TCGv_i32 ahp
= get_ahp_flag();
5976 TCGv_ptr fpst
= fpstatus_ptr(FPST_FPCR
);
5978 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd
, tcg_rn
, fpst
, ahp
);
5979 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5980 write_fp_sreg(s
, rd
, tcg_rd
);
5986 TCGv_i64 tcg_rn
= read_fp_dreg(s
, rn
);
5987 TCGv_i32 tcg_rd
= tcg_temp_new_i32();
5989 /* Double to single */
5990 gen_helper_vfp_fcvtsd(tcg_rd
, tcg_rn
, cpu_env
);
5992 TCGv_ptr fpst
= fpstatus_ptr(FPST_FPCR
);
5993 TCGv_i32 ahp
= get_ahp_flag();
5994 /* Double to half */
5995 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd
, tcg_rn
, fpst
, ahp
);
5996 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5998 write_fp_sreg(s
, rd
, tcg_rd
);
6003 TCGv_i32 tcg_rn
= read_fp_sreg(s
, rn
);
6004 TCGv_ptr tcg_fpst
= fpstatus_ptr(FPST_FPCR
);
6005 TCGv_i32 tcg_ahp
= get_ahp_flag();
6006 tcg_gen_ext16u_i32(tcg_rn
, tcg_rn
);
6008 /* Half to single */
6009 TCGv_i32 tcg_rd
= tcg_temp_new_i32();
6010 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd
, tcg_rn
, tcg_fpst
, tcg_ahp
);
6011 write_fp_sreg(s
, rd
, tcg_rd
);
6013 /* Half to double */
6014 TCGv_i64 tcg_rd
= tcg_temp_new_i64();
6015 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd
, tcg_rn
, tcg_fpst
, tcg_ahp
);
6016 write_fp_dreg(s
, rd
, tcg_rd
);
6021 g_assert_not_reached();
6025 /* Floating point data-processing (1 source)
6026 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0
6027 * +---+---+---+-----------+------+---+--------+-----------+------+------+
6028 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd |
6029 * +---+---+---+-----------+------+---+--------+-----------+------+------+
6031 static void disas_fp_1src(DisasContext
*s
, uint32_t insn
)
6033 int mos
= extract32(insn
, 29, 3);
6034 int type
= extract32(insn
, 22, 2);
6035 int opcode
= extract32(insn
, 15, 6);
6036 int rn
= extract32(insn
, 5, 5);
6037 int rd
= extract32(insn
, 0, 5);
6040 goto do_unallocated
;
6044 case 0x4: case 0x5: case 0x7:
6046 /* FCVT between half, single and double precision */
6047 int dtype
= extract32(opcode
, 0, 2);
6048 if (type
== 2 || dtype
== type
) {
6049 goto do_unallocated
;
6051 if (!fp_access_check(s
)) {
6055 handle_fp_fcvt(s
, opcode
, rd
, rn
, dtype
, type
);
6059 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6060 if (type
> 1 || !dc_isar_feature(aa64_frint
, s
)) {
6061 goto do_unallocated
;
6067 /* 32-to-32 and 64-to-64 ops */
6070 if (!fp_access_check(s
)) {
6073 handle_fp_1src_single(s
, opcode
, rd
, rn
);
6076 if (!fp_access_check(s
)) {
6079 handle_fp_1src_double(s
, opcode
, rd
, rn
);
6082 if (!dc_isar_feature(aa64_fp16
, s
)) {
6083 goto do_unallocated
;
6086 if (!fp_access_check(s
)) {
6089 handle_fp_1src_half(s
, opcode
, rd
, rn
);
6092 goto do_unallocated
;
6099 if (!dc_isar_feature(aa64_bf16
, s
)) {
6100 goto do_unallocated
;
6102 if (!fp_access_check(s
)) {
6105 handle_fp_1src_single(s
, opcode
, rd
, rn
);
6108 goto do_unallocated
;
6114 unallocated_encoding(s
);
6119 /* Floating-point data-processing (2 source) - single precision */
6120 static void handle_fp_2src_single(DisasContext
*s
, int opcode
,
6121 int rd
, int rn
, int rm
)
6128 tcg_res
= tcg_temp_new_i32();
6129 fpst
= fpstatus_ptr(FPST_FPCR
);
6130 tcg_op1
= read_fp_sreg(s
, rn
);
6131 tcg_op2
= read_fp_sreg(s
, rm
);
6134 case 0x0: /* FMUL */
6135 gen_helper_vfp_muls(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6137 case 0x1: /* FDIV */
6138 gen_helper_vfp_divs(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6140 case 0x2: /* FADD */
6141 gen_helper_vfp_adds(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6143 case 0x3: /* FSUB */
6144 gen_helper_vfp_subs(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6146 case 0x4: /* FMAX */
6147 gen_helper_vfp_maxs(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6149 case 0x5: /* FMIN */
6150 gen_helper_vfp_mins(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6152 case 0x6: /* FMAXNM */
6153 gen_helper_vfp_maxnums(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6155 case 0x7: /* FMINNM */
6156 gen_helper_vfp_minnums(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6158 case 0x8: /* FNMUL */
6159 gen_helper_vfp_muls(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6160 gen_helper_vfp_negs(tcg_res
, tcg_res
);
6164 write_fp_sreg(s
, rd
, tcg_res
);
6167 /* Floating-point data-processing (2 source) - double precision */
6168 static void handle_fp_2src_double(DisasContext
*s
, int opcode
,
6169 int rd
, int rn
, int rm
)
6176 tcg_res
= tcg_temp_new_i64();
6177 fpst
= fpstatus_ptr(FPST_FPCR
);
6178 tcg_op1
= read_fp_dreg(s
, rn
);
6179 tcg_op2
= read_fp_dreg(s
, rm
);
6182 case 0x0: /* FMUL */
6183 gen_helper_vfp_muld(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6185 case 0x1: /* FDIV */
6186 gen_helper_vfp_divd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6188 case 0x2: /* FADD */
6189 gen_helper_vfp_addd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6191 case 0x3: /* FSUB */
6192 gen_helper_vfp_subd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6194 case 0x4: /* FMAX */
6195 gen_helper_vfp_maxd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6197 case 0x5: /* FMIN */
6198 gen_helper_vfp_mind(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6200 case 0x6: /* FMAXNM */
6201 gen_helper_vfp_maxnumd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6203 case 0x7: /* FMINNM */
6204 gen_helper_vfp_minnumd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6206 case 0x8: /* FNMUL */
6207 gen_helper_vfp_muld(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6208 gen_helper_vfp_negd(tcg_res
, tcg_res
);
6212 write_fp_dreg(s
, rd
, tcg_res
);
6215 /* Floating-point data-processing (2 source) - half precision */
6216 static void handle_fp_2src_half(DisasContext
*s
, int opcode
,
6217 int rd
, int rn
, int rm
)
6224 tcg_res
= tcg_temp_new_i32();
6225 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
6226 tcg_op1
= read_fp_hreg(s
, rn
);
6227 tcg_op2
= read_fp_hreg(s
, rm
);
6230 case 0x0: /* FMUL */
6231 gen_helper_advsimd_mulh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6233 case 0x1: /* FDIV */
6234 gen_helper_advsimd_divh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6236 case 0x2: /* FADD */
6237 gen_helper_advsimd_addh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6239 case 0x3: /* FSUB */
6240 gen_helper_advsimd_subh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6242 case 0x4: /* FMAX */
6243 gen_helper_advsimd_maxh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6245 case 0x5: /* FMIN */
6246 gen_helper_advsimd_minh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6248 case 0x6: /* FMAXNM */
6249 gen_helper_advsimd_maxnumh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6251 case 0x7: /* FMINNM */
6252 gen_helper_advsimd_minnumh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6254 case 0x8: /* FNMUL */
6255 gen_helper_advsimd_mulh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
6256 tcg_gen_xori_i32(tcg_res
, tcg_res
, 0x8000);
6259 g_assert_not_reached();
6262 write_fp_sreg(s
, rd
, tcg_res
);
6265 /* Floating point data-processing (2 source)
6266 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
6267 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6268 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd |
6269 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6271 static void disas_fp_2src(DisasContext
*s
, uint32_t insn
)
6273 int mos
= extract32(insn
, 29, 3);
6274 int type
= extract32(insn
, 22, 2);
6275 int rd
= extract32(insn
, 0, 5);
6276 int rn
= extract32(insn
, 5, 5);
6277 int rm
= extract32(insn
, 16, 5);
6278 int opcode
= extract32(insn
, 12, 4);
6280 if (opcode
> 8 || mos
) {
6281 unallocated_encoding(s
);
6287 if (!fp_access_check(s
)) {
6290 handle_fp_2src_single(s
, opcode
, rd
, rn
, rm
);
6293 if (!fp_access_check(s
)) {
6296 handle_fp_2src_double(s
, opcode
, rd
, rn
, rm
);
6299 if (!dc_isar_feature(aa64_fp16
, s
)) {
6300 unallocated_encoding(s
);
6303 if (!fp_access_check(s
)) {
6306 handle_fp_2src_half(s
, opcode
, rd
, rn
, rm
);
6309 unallocated_encoding(s
);
6313 /* Floating-point data-processing (3 source) - single precision */
6314 static void handle_fp_3src_single(DisasContext
*s
, bool o0
, bool o1
,
6315 int rd
, int rn
, int rm
, int ra
)
6317 TCGv_i32 tcg_op1
, tcg_op2
, tcg_op3
;
6318 TCGv_i32 tcg_res
= tcg_temp_new_i32();
6319 TCGv_ptr fpst
= fpstatus_ptr(FPST_FPCR
);
6321 tcg_op1
= read_fp_sreg(s
, rn
);
6322 tcg_op2
= read_fp_sreg(s
, rm
);
6323 tcg_op3
= read_fp_sreg(s
, ra
);
6325 /* These are fused multiply-add, and must be done as one
6326 * floating point operation with no rounding between the
6327 * multiplication and addition steps.
6328 * NB that doing the negations here as separate steps is
6329 * correct : an input NaN should come out with its sign bit
6330 * flipped if it is a negated-input.
6333 gen_helper_vfp_negs(tcg_op3
, tcg_op3
);
6337 gen_helper_vfp_negs(tcg_op1
, tcg_op1
);
6340 gen_helper_vfp_muladds(tcg_res
, tcg_op1
, tcg_op2
, tcg_op3
, fpst
);
6342 write_fp_sreg(s
, rd
, tcg_res
);
6345 /* Floating-point data-processing (3 source) - double precision */
6346 static void handle_fp_3src_double(DisasContext
*s
, bool o0
, bool o1
,
6347 int rd
, int rn
, int rm
, int ra
)
6349 TCGv_i64 tcg_op1
, tcg_op2
, tcg_op3
;
6350 TCGv_i64 tcg_res
= tcg_temp_new_i64();
6351 TCGv_ptr fpst
= fpstatus_ptr(FPST_FPCR
);
6353 tcg_op1
= read_fp_dreg(s
, rn
);
6354 tcg_op2
= read_fp_dreg(s
, rm
);
6355 tcg_op3
= read_fp_dreg(s
, ra
);
6357 /* These are fused multiply-add, and must be done as one
6358 * floating point operation with no rounding between the
6359 * multiplication and addition steps.
6360 * NB that doing the negations here as separate steps is
6361 * correct : an input NaN should come out with its sign bit
6362 * flipped if it is a negated-input.
6365 gen_helper_vfp_negd(tcg_op3
, tcg_op3
);
6369 gen_helper_vfp_negd(tcg_op1
, tcg_op1
);
6372 gen_helper_vfp_muladdd(tcg_res
, tcg_op1
, tcg_op2
, tcg_op3
, fpst
);
6374 write_fp_dreg(s
, rd
, tcg_res
);
6377 /* Floating-point data-processing (3 source) - half precision */
6378 static void handle_fp_3src_half(DisasContext
*s
, bool o0
, bool o1
,
6379 int rd
, int rn
, int rm
, int ra
)
6381 TCGv_i32 tcg_op1
, tcg_op2
, tcg_op3
;
6382 TCGv_i32 tcg_res
= tcg_temp_new_i32();
6383 TCGv_ptr fpst
= fpstatus_ptr(FPST_FPCR_F16
);
6385 tcg_op1
= read_fp_hreg(s
, rn
);
6386 tcg_op2
= read_fp_hreg(s
, rm
);
6387 tcg_op3
= read_fp_hreg(s
, ra
);
6389 /* These are fused multiply-add, and must be done as one
6390 * floating point operation with no rounding between the
6391 * multiplication and addition steps.
6392 * NB that doing the negations here as separate steps is
6393 * correct : an input NaN should come out with its sign bit
6394 * flipped if it is a negated-input.
6397 tcg_gen_xori_i32(tcg_op3
, tcg_op3
, 0x8000);
6401 tcg_gen_xori_i32(tcg_op1
, tcg_op1
, 0x8000);
6404 gen_helper_advsimd_muladdh(tcg_res
, tcg_op1
, tcg_op2
, tcg_op3
, fpst
);
6406 write_fp_sreg(s
, rd
, tcg_res
);
6409 /* Floating point data-processing (3 source)
6410 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0
6411 * +---+---+---+-----------+------+----+------+----+------+------+------+
6412 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd |
6413 * +---+---+---+-----------+------+----+------+----+------+------+------+
6415 static void disas_fp_3src(DisasContext
*s
, uint32_t insn
)
6417 int mos
= extract32(insn
, 29, 3);
6418 int type
= extract32(insn
, 22, 2);
6419 int rd
= extract32(insn
, 0, 5);
6420 int rn
= extract32(insn
, 5, 5);
6421 int ra
= extract32(insn
, 10, 5);
6422 int rm
= extract32(insn
, 16, 5);
6423 bool o0
= extract32(insn
, 15, 1);
6424 bool o1
= extract32(insn
, 21, 1);
6427 unallocated_encoding(s
);
6433 if (!fp_access_check(s
)) {
6436 handle_fp_3src_single(s
, o0
, o1
, rd
, rn
, rm
, ra
);
6439 if (!fp_access_check(s
)) {
6442 handle_fp_3src_double(s
, o0
, o1
, rd
, rn
, rm
, ra
);
6445 if (!dc_isar_feature(aa64_fp16
, s
)) {
6446 unallocated_encoding(s
);
6449 if (!fp_access_check(s
)) {
6452 handle_fp_3src_half(s
, o0
, o1
, rd
, rn
, rm
, ra
);
6455 unallocated_encoding(s
);
6459 /* Floating point immediate
6460 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
6461 * +---+---+---+-----------+------+---+------------+-------+------+------+
6462 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd |
6463 * +---+---+---+-----------+------+---+------------+-------+------+------+
6465 static void disas_fp_imm(DisasContext
*s
, uint32_t insn
)
6467 int rd
= extract32(insn
, 0, 5);
6468 int imm5
= extract32(insn
, 5, 5);
6469 int imm8
= extract32(insn
, 13, 8);
6470 int type
= extract32(insn
, 22, 2);
6471 int mos
= extract32(insn
, 29, 3);
6476 unallocated_encoding(s
);
6489 if (dc_isar_feature(aa64_fp16
, s
)) {
6494 unallocated_encoding(s
);
6498 if (!fp_access_check(s
)) {
6502 imm
= vfp_expand_imm(sz
, imm8
);
6503 write_fp_dreg(s
, rd
, tcg_constant_i64(imm
));
6506 /* Handle floating point <=> fixed point conversions. Note that we can
6507 * also deal with fp <=> integer conversions as a special case (scale == 64)
6508 * OPTME: consider handling that special case specially or at least skipping
6509 * the call to scalbn in the helpers for zero shifts.
6511 static void handle_fpfpcvt(DisasContext
*s
, int rd
, int rn
, int opcode
,
6512 bool itof
, int rmode
, int scale
, int sf
, int type
)
6514 bool is_signed
= !(opcode
& 1);
6515 TCGv_ptr tcg_fpstatus
;
6516 TCGv_i32 tcg_shift
, tcg_single
;
6517 TCGv_i64 tcg_double
;
6519 tcg_fpstatus
= fpstatus_ptr(type
== 3 ? FPST_FPCR_F16
: FPST_FPCR
);
6521 tcg_shift
= tcg_constant_i32(64 - scale
);
6524 TCGv_i64 tcg_int
= cpu_reg(s
, rn
);
6526 TCGv_i64 tcg_extend
= tcg_temp_new_i64();
6529 tcg_gen_ext32s_i64(tcg_extend
, tcg_int
);
6531 tcg_gen_ext32u_i64(tcg_extend
, tcg_int
);
6534 tcg_int
= tcg_extend
;
6538 case 1: /* float64 */
6539 tcg_double
= tcg_temp_new_i64();
6541 gen_helper_vfp_sqtod(tcg_double
, tcg_int
,
6542 tcg_shift
, tcg_fpstatus
);
6544 gen_helper_vfp_uqtod(tcg_double
, tcg_int
,
6545 tcg_shift
, tcg_fpstatus
);
6547 write_fp_dreg(s
, rd
, tcg_double
);
6550 case 0: /* float32 */
6551 tcg_single
= tcg_temp_new_i32();
6553 gen_helper_vfp_sqtos(tcg_single
, tcg_int
,
6554 tcg_shift
, tcg_fpstatus
);
6556 gen_helper_vfp_uqtos(tcg_single
, tcg_int
,
6557 tcg_shift
, tcg_fpstatus
);
6559 write_fp_sreg(s
, rd
, tcg_single
);
6562 case 3: /* float16 */
6563 tcg_single
= tcg_temp_new_i32();
6565 gen_helper_vfp_sqtoh(tcg_single
, tcg_int
,
6566 tcg_shift
, tcg_fpstatus
);
6568 gen_helper_vfp_uqtoh(tcg_single
, tcg_int
,
6569 tcg_shift
, tcg_fpstatus
);
6571 write_fp_sreg(s
, rd
, tcg_single
);
6575 g_assert_not_reached();
6578 TCGv_i64 tcg_int
= cpu_reg(s
, rd
);
6581 if (extract32(opcode
, 2, 1)) {
6582 /* There are too many rounding modes to all fit into rmode,
6583 * so FCVTA[US] is a special case.
6585 rmode
= FPROUNDING_TIEAWAY
;
6588 tcg_rmode
= gen_set_rmode(rmode
, tcg_fpstatus
);
6591 case 1: /* float64 */
6592 tcg_double
= read_fp_dreg(s
, rn
);
6595 gen_helper_vfp_tosld(tcg_int
, tcg_double
,
6596 tcg_shift
, tcg_fpstatus
);
6598 gen_helper_vfp_tosqd(tcg_int
, tcg_double
,
6599 tcg_shift
, tcg_fpstatus
);
6603 gen_helper_vfp_tould(tcg_int
, tcg_double
,
6604 tcg_shift
, tcg_fpstatus
);
6606 gen_helper_vfp_touqd(tcg_int
, tcg_double
,
6607 tcg_shift
, tcg_fpstatus
);
6611 tcg_gen_ext32u_i64(tcg_int
, tcg_int
);
6615 case 0: /* float32 */
6616 tcg_single
= read_fp_sreg(s
, rn
);
6619 gen_helper_vfp_tosqs(tcg_int
, tcg_single
,
6620 tcg_shift
, tcg_fpstatus
);
6622 gen_helper_vfp_touqs(tcg_int
, tcg_single
,
6623 tcg_shift
, tcg_fpstatus
);
6626 TCGv_i32 tcg_dest
= tcg_temp_new_i32();
6628 gen_helper_vfp_tosls(tcg_dest
, tcg_single
,
6629 tcg_shift
, tcg_fpstatus
);
6631 gen_helper_vfp_touls(tcg_dest
, tcg_single
,
6632 tcg_shift
, tcg_fpstatus
);
6634 tcg_gen_extu_i32_i64(tcg_int
, tcg_dest
);
6638 case 3: /* float16 */
6639 tcg_single
= read_fp_sreg(s
, rn
);
6642 gen_helper_vfp_tosqh(tcg_int
, tcg_single
,
6643 tcg_shift
, tcg_fpstatus
);
6645 gen_helper_vfp_touqh(tcg_int
, tcg_single
,
6646 tcg_shift
, tcg_fpstatus
);
6649 TCGv_i32 tcg_dest
= tcg_temp_new_i32();
6651 gen_helper_vfp_toslh(tcg_dest
, tcg_single
,
6652 tcg_shift
, tcg_fpstatus
);
6654 gen_helper_vfp_toulh(tcg_dest
, tcg_single
,
6655 tcg_shift
, tcg_fpstatus
);
6657 tcg_gen_extu_i32_i64(tcg_int
, tcg_dest
);
6662 g_assert_not_reached();
6665 gen_restore_rmode(tcg_rmode
, tcg_fpstatus
);
6669 /* Floating point <-> fixed point conversions
6670 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
6671 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6672 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd |
6673 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6675 static void disas_fp_fixed_conv(DisasContext
*s
, uint32_t insn
)
6677 int rd
= extract32(insn
, 0, 5);
6678 int rn
= extract32(insn
, 5, 5);
6679 int scale
= extract32(insn
, 10, 6);
6680 int opcode
= extract32(insn
, 16, 3);
6681 int rmode
= extract32(insn
, 19, 2);
6682 int type
= extract32(insn
, 22, 2);
6683 bool sbit
= extract32(insn
, 29, 1);
6684 bool sf
= extract32(insn
, 31, 1);
6687 if (sbit
|| (!sf
&& scale
< 32)) {
6688 unallocated_encoding(s
);
6693 case 0: /* float32 */
6694 case 1: /* float64 */
6696 case 3: /* float16 */
6697 if (dc_isar_feature(aa64_fp16
, s
)) {
6702 unallocated_encoding(s
);
6706 switch ((rmode
<< 3) | opcode
) {
6707 case 0x2: /* SCVTF */
6708 case 0x3: /* UCVTF */
6711 case 0x18: /* FCVTZS */
6712 case 0x19: /* FCVTZU */
6716 unallocated_encoding(s
);
6720 if (!fp_access_check(s
)) {
6724 handle_fpfpcvt(s
, rd
, rn
, opcode
, itof
, FPROUNDING_ZERO
, scale
, sf
, type
);
6727 static void handle_fmov(DisasContext
*s
, int rd
, int rn
, int type
, bool itof
)
6729 /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6730 * without conversion.
6734 TCGv_i64 tcg_rn
= cpu_reg(s
, rn
);
6740 tmp
= tcg_temp_new_i64();
6741 tcg_gen_ext32u_i64(tmp
, tcg_rn
);
6742 write_fp_dreg(s
, rd
, tmp
);
6746 write_fp_dreg(s
, rd
, tcg_rn
);
6749 /* 64 bit to top half. */
6750 tcg_gen_st_i64(tcg_rn
, cpu_env
, fp_reg_hi_offset(s
, rd
));
6751 clear_vec_high(s
, true, rd
);
6755 tmp
= tcg_temp_new_i64();
6756 tcg_gen_ext16u_i64(tmp
, tcg_rn
);
6757 write_fp_dreg(s
, rd
, tmp
);
6760 g_assert_not_reached();
6763 TCGv_i64 tcg_rd
= cpu_reg(s
, rd
);
6768 tcg_gen_ld32u_i64(tcg_rd
, cpu_env
, fp_reg_offset(s
, rn
, MO_32
));
6772 tcg_gen_ld_i64(tcg_rd
, cpu_env
, fp_reg_offset(s
, rn
, MO_64
));
6775 /* 64 bits from top half */
6776 tcg_gen_ld_i64(tcg_rd
, cpu_env
, fp_reg_hi_offset(s
, rn
));
6780 tcg_gen_ld16u_i64(tcg_rd
, cpu_env
, fp_reg_offset(s
, rn
, MO_16
));
6783 g_assert_not_reached();
6788 static void handle_fjcvtzs(DisasContext
*s
, int rd
, int rn
)
6790 TCGv_i64 t
= read_fp_dreg(s
, rn
);
6791 TCGv_ptr fpstatus
= fpstatus_ptr(FPST_FPCR
);
6793 gen_helper_fjcvtzs(t
, t
, fpstatus
);
6795 tcg_gen_ext32u_i64(cpu_reg(s
, rd
), t
);
6796 tcg_gen_extrh_i64_i32(cpu_ZF
, t
);
6797 tcg_gen_movi_i32(cpu_CF
, 0);
6798 tcg_gen_movi_i32(cpu_NF
, 0);
6799 tcg_gen_movi_i32(cpu_VF
, 0);
6802 /* Floating point <-> integer conversions
6803 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
6804 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6805 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6806 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6808 static void disas_fp_int_conv(DisasContext
*s
, uint32_t insn
)
6810 int rd
= extract32(insn
, 0, 5);
6811 int rn
= extract32(insn
, 5, 5);
6812 int opcode
= extract32(insn
, 16, 3);
6813 int rmode
= extract32(insn
, 19, 2);
6814 int type
= extract32(insn
, 22, 2);
6815 bool sbit
= extract32(insn
, 29, 1);
6816 bool sf
= extract32(insn
, 31, 1);
6820 goto do_unallocated
;
6828 case 4: /* FCVTAS */
6829 case 5: /* FCVTAU */
6831 goto do_unallocated
;
6834 case 0: /* FCVT[NPMZ]S */
6835 case 1: /* FCVT[NPMZ]U */
6837 case 0: /* float32 */
6838 case 1: /* float64 */
6840 case 3: /* float16 */
6841 if (!dc_isar_feature(aa64_fp16
, s
)) {
6842 goto do_unallocated
;
6846 goto do_unallocated
;
6848 if (!fp_access_check(s
)) {
6851 handle_fpfpcvt(s
, rd
, rn
, opcode
, itof
, rmode
, 64, sf
, type
);
6855 switch (sf
<< 7 | type
<< 5 | rmode
<< 3 | opcode
) {
6856 case 0b01100110: /* FMOV half <-> 32-bit int */
6858 case 0b11100110: /* FMOV half <-> 64-bit int */
6860 if (!dc_isar_feature(aa64_fp16
, s
)) {
6861 goto do_unallocated
;
6864 case 0b00000110: /* FMOV 32-bit */
6866 case 0b10100110: /* FMOV 64-bit */
6868 case 0b11001110: /* FMOV top half of 128-bit */
6870 if (!fp_access_check(s
)) {
6874 handle_fmov(s
, rd
, rn
, type
, itof
);
6877 case 0b00111110: /* FJCVTZS */
6878 if (!dc_isar_feature(aa64_jscvt
, s
)) {
6879 goto do_unallocated
;
6880 } else if (fp_access_check(s
)) {
6881 handle_fjcvtzs(s
, rd
, rn
);
6887 unallocated_encoding(s
);
6894 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6895 * 31 30 29 28 25 24 0
6896 * +---+---+---+---------+-----------------------------+
6897 * | | 0 | | 1 1 1 1 | |
6898 * +---+---+---+---------+-----------------------------+
6900 static void disas_data_proc_fp(DisasContext
*s
, uint32_t insn
)
6902 if (extract32(insn
, 24, 1)) {
6903 /* Floating point data-processing (3 source) */
6904 disas_fp_3src(s
, insn
);
6905 } else if (extract32(insn
, 21, 1) == 0) {
6906 /* Floating point to fixed point conversions */
6907 disas_fp_fixed_conv(s
, insn
);
6909 switch (extract32(insn
, 10, 2)) {
6911 /* Floating point conditional compare */
6912 disas_fp_ccomp(s
, insn
);
6915 /* Floating point data-processing (2 source) */
6916 disas_fp_2src(s
, insn
);
6919 /* Floating point conditional select */
6920 disas_fp_csel(s
, insn
);
6923 switch (ctz32(extract32(insn
, 12, 4))) {
6924 case 0: /* [15:12] == xxx1 */
6925 /* Floating point immediate */
6926 disas_fp_imm(s
, insn
);
6928 case 1: /* [15:12] == xx10 */
6929 /* Floating point compare */
6930 disas_fp_compare(s
, insn
);
6932 case 2: /* [15:12] == x100 */
6933 /* Floating point data-processing (1 source) */
6934 disas_fp_1src(s
, insn
);
6936 case 3: /* [15:12] == 1000 */
6937 unallocated_encoding(s
);
6939 default: /* [15:12] == 0000 */
6940 /* Floating point <-> integer conversions */
6941 disas_fp_int_conv(s
, insn
);
6949 static void do_ext64(DisasContext
*s
, TCGv_i64 tcg_left
, TCGv_i64 tcg_right
,
6952 /* Extract 64 bits from the middle of two concatenated 64 bit
6953 * vector register slices left:right. The extracted bits start
6954 * at 'pos' bits into the right (least significant) side.
6955 * We return the result in tcg_right, and guarantee not to
6958 TCGv_i64 tcg_tmp
= tcg_temp_new_i64();
6959 assert(pos
> 0 && pos
< 64);
6961 tcg_gen_shri_i64(tcg_right
, tcg_right
, pos
);
6962 tcg_gen_shli_i64(tcg_tmp
, tcg_left
, 64 - pos
);
6963 tcg_gen_or_i64(tcg_right
, tcg_right
, tcg_tmp
);
6967 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
6968 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6969 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
6970 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6972 static void disas_simd_ext(DisasContext
*s
, uint32_t insn
)
6974 int is_q
= extract32(insn
, 30, 1);
6975 int op2
= extract32(insn
, 22, 2);
6976 int imm4
= extract32(insn
, 11, 4);
6977 int rm
= extract32(insn
, 16, 5);
6978 int rn
= extract32(insn
, 5, 5);
6979 int rd
= extract32(insn
, 0, 5);
6980 int pos
= imm4
<< 3;
6981 TCGv_i64 tcg_resl
, tcg_resh
;
6983 if (op2
!= 0 || (!is_q
&& extract32(imm4
, 3, 1))) {
6984 unallocated_encoding(s
);
6988 if (!fp_access_check(s
)) {
6992 tcg_resh
= tcg_temp_new_i64();
6993 tcg_resl
= tcg_temp_new_i64();
6995 /* Vd gets bits starting at pos bits into Vm:Vn. This is
6996 * either extracting 128 bits from a 128:128 concatenation, or
6997 * extracting 64 bits from a 64:64 concatenation.
7000 read_vec_element(s
, tcg_resl
, rn
, 0, MO_64
);
7002 read_vec_element(s
, tcg_resh
, rm
, 0, MO_64
);
7003 do_ext64(s
, tcg_resh
, tcg_resl
, pos
);
7011 EltPosns eltposns
[] = { {rn
, 0}, {rn
, 1}, {rm
, 0}, {rm
, 1} };
7012 EltPosns
*elt
= eltposns
;
7019 read_vec_element(s
, tcg_resl
, elt
->reg
, elt
->elt
, MO_64
);
7021 read_vec_element(s
, tcg_resh
, elt
->reg
, elt
->elt
, MO_64
);
7024 do_ext64(s
, tcg_resh
, tcg_resl
, pos
);
7025 tcg_hh
= tcg_temp_new_i64();
7026 read_vec_element(s
, tcg_hh
, elt
->reg
, elt
->elt
, MO_64
);
7027 do_ext64(s
, tcg_hh
, tcg_resh
, pos
);
7031 write_vec_element(s
, tcg_resl
, rd
, 0, MO_64
);
7033 write_vec_element(s
, tcg_resh
, rd
, 1, MO_64
);
7035 clear_vec_high(s
, is_q
, rd
);
7039 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
7040 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7041 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
7042 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7044 static void disas_simd_tb(DisasContext
*s
, uint32_t insn
)
7046 int op2
= extract32(insn
, 22, 2);
7047 int is_q
= extract32(insn
, 30, 1);
7048 int rm
= extract32(insn
, 16, 5);
7049 int rn
= extract32(insn
, 5, 5);
7050 int rd
= extract32(insn
, 0, 5);
7051 int is_tbx
= extract32(insn
, 12, 1);
7052 int len
= (extract32(insn
, 13, 2) + 1) * 16;
7055 unallocated_encoding(s
);
7059 if (!fp_access_check(s
)) {
7063 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s
, rd
),
7064 vec_full_reg_offset(s
, rm
), cpu_env
,
7065 is_q
? 16 : 8, vec_full_reg_size(s
),
7066 (len
<< 6) | (is_tbx
<< 5) | rn
,
7067 gen_helper_simd_tblx
);
7071 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
7072 * +---+---+-------------+------+---+------+---+------------------+------+
7073 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
7074 * +---+---+-------------+------+---+------+---+------------------+------+
7076 static void disas_simd_zip_trn(DisasContext
*s
, uint32_t insn
)
7078 int rd
= extract32(insn
, 0, 5);
7079 int rn
= extract32(insn
, 5, 5);
7080 int rm
= extract32(insn
, 16, 5);
7081 int size
= extract32(insn
, 22, 2);
7082 /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7083 * bit 2 indicates 1 vs 2 variant of the insn.
7085 int opcode
= extract32(insn
, 12, 2);
7086 bool part
= extract32(insn
, 14, 1);
7087 bool is_q
= extract32(insn
, 30, 1);
7088 int esize
= 8 << size
;
7090 int datasize
= is_q
? 128 : 64;
7091 int elements
= datasize
/ esize
;
7092 TCGv_i64 tcg_res
[2], tcg_ele
;
7094 if (opcode
== 0 || (size
== 3 && !is_q
)) {
7095 unallocated_encoding(s
);
7099 if (!fp_access_check(s
)) {
7103 tcg_res
[0] = tcg_temp_new_i64();
7104 tcg_res
[1] = is_q
? tcg_temp_new_i64() : NULL
;
7105 tcg_ele
= tcg_temp_new_i64();
7107 for (i
= 0; i
< elements
; i
++) {
7111 case 1: /* UZP1/2 */
7113 int midpoint
= elements
/ 2;
7115 read_vec_element(s
, tcg_ele
, rn
, 2 * i
+ part
, size
);
7117 read_vec_element(s
, tcg_ele
, rm
,
7118 2 * (i
- midpoint
) + part
, size
);
7122 case 2: /* TRN1/2 */
7124 read_vec_element(s
, tcg_ele
, rm
, (i
& ~1) + part
, size
);
7126 read_vec_element(s
, tcg_ele
, rn
, (i
& ~1) + part
, size
);
7129 case 3: /* ZIP1/2 */
7131 int base
= part
* elements
/ 2;
7133 read_vec_element(s
, tcg_ele
, rm
, base
+ (i
>> 1), size
);
7135 read_vec_element(s
, tcg_ele
, rn
, base
+ (i
>> 1), size
);
7140 g_assert_not_reached();
7143 w
= (i
* esize
) / 64;
7144 o
= (i
* esize
) % 64;
7146 tcg_gen_mov_i64(tcg_res
[w
], tcg_ele
);
7148 tcg_gen_shli_i64(tcg_ele
, tcg_ele
, o
);
7149 tcg_gen_or_i64(tcg_res
[w
], tcg_res
[w
], tcg_ele
);
7153 for (i
= 0; i
<= is_q
; ++i
) {
7154 write_vec_element(s
, tcg_res
[i
], rd
, i
, MO_64
);
7156 clear_vec_high(s
, is_q
, rd
);
7160 * do_reduction_op helper
7162 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7163 * important for correct NaN propagation that we do these
7164 * operations in exactly the order specified by the pseudocode.
7166 * This is a recursive function, TCG temps should be freed by the
7167 * calling function once it is done with the values.
7169 static TCGv_i32
do_reduction_op(DisasContext
*s
, int fpopcode
, int rn
,
7170 int esize
, int size
, int vmap
, TCGv_ptr fpst
)
7172 if (esize
== size
) {
7174 MemOp msize
= esize
== 16 ? MO_16
: MO_32
;
7177 /* We should have one register left here */
7178 assert(ctpop8(vmap
) == 1);
7179 element
= ctz32(vmap
);
7180 assert(element
< 8);
7182 tcg_elem
= tcg_temp_new_i32();
7183 read_vec_element_i32(s
, tcg_elem
, rn
, element
, msize
);
7186 int bits
= size
/ 2;
7187 int shift
= ctpop8(vmap
) / 2;
7188 int vmap_lo
= (vmap
>> shift
) & vmap
;
7189 int vmap_hi
= (vmap
& ~vmap_lo
);
7190 TCGv_i32 tcg_hi
, tcg_lo
, tcg_res
;
7192 tcg_hi
= do_reduction_op(s
, fpopcode
, rn
, esize
, bits
, vmap_hi
, fpst
);
7193 tcg_lo
= do_reduction_op(s
, fpopcode
, rn
, esize
, bits
, vmap_lo
, fpst
);
7194 tcg_res
= tcg_temp_new_i32();
7197 case 0x0c: /* fmaxnmv half-precision */
7198 gen_helper_advsimd_maxnumh(tcg_res
, tcg_lo
, tcg_hi
, fpst
);
7200 case 0x0f: /* fmaxv half-precision */
7201 gen_helper_advsimd_maxh(tcg_res
, tcg_lo
, tcg_hi
, fpst
);
7203 case 0x1c: /* fminnmv half-precision */
7204 gen_helper_advsimd_minnumh(tcg_res
, tcg_lo
, tcg_hi
, fpst
);
7206 case 0x1f: /* fminv half-precision */
7207 gen_helper_advsimd_minh(tcg_res
, tcg_lo
, tcg_hi
, fpst
);
7209 case 0x2c: /* fmaxnmv */
7210 gen_helper_vfp_maxnums(tcg_res
, tcg_lo
, tcg_hi
, fpst
);
7212 case 0x2f: /* fmaxv */
7213 gen_helper_vfp_maxs(tcg_res
, tcg_lo
, tcg_hi
, fpst
);
7215 case 0x3c: /* fminnmv */
7216 gen_helper_vfp_minnums(tcg_res
, tcg_lo
, tcg_hi
, fpst
);
7218 case 0x3f: /* fminv */
7219 gen_helper_vfp_mins(tcg_res
, tcg_lo
, tcg_hi
, fpst
);
7222 g_assert_not_reached();
7228 /* AdvSIMD across lanes
7229 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7230 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7231 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
7232 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7234 static void disas_simd_across_lanes(DisasContext
*s
, uint32_t insn
)
7236 int rd
= extract32(insn
, 0, 5);
7237 int rn
= extract32(insn
, 5, 5);
7238 int size
= extract32(insn
, 22, 2);
7239 int opcode
= extract32(insn
, 12, 5);
7240 bool is_q
= extract32(insn
, 30, 1);
7241 bool is_u
= extract32(insn
, 29, 1);
7243 bool is_min
= false;
7247 TCGv_i64 tcg_res
, tcg_elt
;
7250 case 0x1b: /* ADDV */
7252 unallocated_encoding(s
);
7256 case 0x3: /* SADDLV, UADDLV */
7257 case 0xa: /* SMAXV, UMAXV */
7258 case 0x1a: /* SMINV, UMINV */
7259 if (size
== 3 || (size
== 2 && !is_q
)) {
7260 unallocated_encoding(s
);
7264 case 0xc: /* FMAXNMV, FMINNMV */
7265 case 0xf: /* FMAXV, FMINV */
7266 /* Bit 1 of size field encodes min vs max and the actual size
7267 * depends on the encoding of the U bit. If not set (and FP16
7268 * enabled) then we do half-precision float instead of single
7271 is_min
= extract32(size
, 1, 1);
7273 if (!is_u
&& dc_isar_feature(aa64_fp16
, s
)) {
7275 } else if (!is_u
|| !is_q
|| extract32(size
, 0, 1)) {
7276 unallocated_encoding(s
);
7283 unallocated_encoding(s
);
7287 if (!fp_access_check(s
)) {
7292 elements
= (is_q
? 128 : 64) / esize
;
7294 tcg_res
= tcg_temp_new_i64();
7295 tcg_elt
= tcg_temp_new_i64();
7297 /* These instructions operate across all lanes of a vector
7298 * to produce a single result. We can guarantee that a 64
7299 * bit intermediate is sufficient:
7300 * + for [US]ADDLV the maximum element size is 32 bits, and
7301 * the result type is 64 bits
7302 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7303 * same as the element size, which is 32 bits at most
7304 * For the integer operations we can choose to work at 64
7305 * or 32 bits and truncate at the end; for simplicity
7306 * we use 64 bits always. The floating point
7307 * ops do require 32 bit intermediates, though.
7310 read_vec_element(s
, tcg_res
, rn
, 0, size
| (is_u
? 0 : MO_SIGN
));
7312 for (i
= 1; i
< elements
; i
++) {
7313 read_vec_element(s
, tcg_elt
, rn
, i
, size
| (is_u
? 0 : MO_SIGN
));
7316 case 0x03: /* SADDLV / UADDLV */
7317 case 0x1b: /* ADDV */
7318 tcg_gen_add_i64(tcg_res
, tcg_res
, tcg_elt
);
7320 case 0x0a: /* SMAXV / UMAXV */
7322 tcg_gen_umax_i64(tcg_res
, tcg_res
, tcg_elt
);
7324 tcg_gen_smax_i64(tcg_res
, tcg_res
, tcg_elt
);
7327 case 0x1a: /* SMINV / UMINV */
7329 tcg_gen_umin_i64(tcg_res
, tcg_res
, tcg_elt
);
7331 tcg_gen_smin_i64(tcg_res
, tcg_res
, tcg_elt
);
7335 g_assert_not_reached();
7340 /* Floating point vector reduction ops which work across 32
7341 * bit (single) or 16 bit (half-precision) intermediates.
7342 * Note that correct NaN propagation requires that we do these
7343 * operations in exactly the order specified by the pseudocode.
7345 TCGv_ptr fpst
= fpstatus_ptr(size
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
7346 int fpopcode
= opcode
| is_min
<< 4 | is_u
<< 5;
7347 int vmap
= (1 << elements
) - 1;
7348 TCGv_i32 tcg_res32
= do_reduction_op(s
, fpopcode
, rn
, esize
,
7349 (is_q
? 128 : 64), vmap
, fpst
);
7350 tcg_gen_extu_i32_i64(tcg_res
, tcg_res32
);
7353 /* Now truncate the result to the width required for the final output */
7354 if (opcode
== 0x03) {
7355 /* SADDLV, UADDLV: result is 2*esize */
7361 tcg_gen_ext8u_i64(tcg_res
, tcg_res
);
7364 tcg_gen_ext16u_i64(tcg_res
, tcg_res
);
7367 tcg_gen_ext32u_i64(tcg_res
, tcg_res
);
7372 g_assert_not_reached();
7375 write_fp_dreg(s
, rd
, tcg_res
);
7378 /* DUP (Element, Vector)
7380 * 31 30 29 21 20 16 15 10 9 5 4 0
7381 * +---+---+-------------------+--------+-------------+------+------+
7382 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
7383 * +---+---+-------------------+--------+-------------+------+------+
7385 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7387 static void handle_simd_dupe(DisasContext
*s
, int is_q
, int rd
, int rn
,
7390 int size
= ctz32(imm5
);
7393 if (size
> 3 || (size
== 3 && !is_q
)) {
7394 unallocated_encoding(s
);
7398 if (!fp_access_check(s
)) {
7402 index
= imm5
>> (size
+ 1);
7403 tcg_gen_gvec_dup_mem(size
, vec_full_reg_offset(s
, rd
),
7404 vec_reg_offset(s
, rn
, index
, size
),
7405 is_q
? 16 : 8, vec_full_reg_size(s
));
7408 /* DUP (element, scalar)
7409 * 31 21 20 16 15 10 9 5 4 0
7410 * +-----------------------+--------+-------------+------+------+
7411 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
7412 * +-----------------------+--------+-------------+------+------+
7414 static void handle_simd_dupes(DisasContext
*s
, int rd
, int rn
,
7417 int size
= ctz32(imm5
);
7422 unallocated_encoding(s
);
7426 if (!fp_access_check(s
)) {
7430 index
= imm5
>> (size
+ 1);
7432 /* This instruction just extracts the specified element and
7433 * zero-extends it into the bottom of the destination register.
7435 tmp
= tcg_temp_new_i64();
7436 read_vec_element(s
, tmp
, rn
, index
, size
);
7437 write_fp_dreg(s
, rd
, tmp
);
7442 * 31 30 29 21 20 16 15 10 9 5 4 0
7443 * +---+---+-------------------+--------+-------------+------+------+
7444 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
7445 * +---+---+-------------------+--------+-------------+------+------+
7447 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7449 static void handle_simd_dupg(DisasContext
*s
, int is_q
, int rd
, int rn
,
7452 int size
= ctz32(imm5
);
7453 uint32_t dofs
, oprsz
, maxsz
;
7455 if (size
> 3 || ((size
== 3) && !is_q
)) {
7456 unallocated_encoding(s
);
7460 if (!fp_access_check(s
)) {
7464 dofs
= vec_full_reg_offset(s
, rd
);
7465 oprsz
= is_q
? 16 : 8;
7466 maxsz
= vec_full_reg_size(s
);
7468 tcg_gen_gvec_dup_i64(size
, dofs
, oprsz
, maxsz
, cpu_reg(s
, rn
));
7473 * 31 21 20 16 15 14 11 10 9 5 4 0
7474 * +-----------------------+--------+------------+---+------+------+
7475 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7476 * +-----------------------+--------+------------+---+------+------+
7478 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7479 * index: encoded in imm5<4:size+1>
7481 static void handle_simd_inse(DisasContext
*s
, int rd
, int rn
,
7484 int size
= ctz32(imm5
);
7485 int src_index
, dst_index
;
7489 unallocated_encoding(s
);
7493 if (!fp_access_check(s
)) {
7497 dst_index
= extract32(imm5
, 1+size
, 5);
7498 src_index
= extract32(imm4
, size
, 4);
7500 tmp
= tcg_temp_new_i64();
7502 read_vec_element(s
, tmp
, rn
, src_index
, size
);
7503 write_vec_element(s
, tmp
, rd
, dst_index
, size
);
7505 /* INS is considered a 128-bit write for SVE. */
7506 clear_vec_high(s
, true, rd
);
7512 * 31 21 20 16 15 10 9 5 4 0
7513 * +-----------------------+--------+-------------+------+------+
7514 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
7515 * +-----------------------+--------+-------------+------+------+
7517 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7518 * index: encoded in imm5<4:size+1>
7520 static void handle_simd_insg(DisasContext
*s
, int rd
, int rn
, int imm5
)
7522 int size
= ctz32(imm5
);
7526 unallocated_encoding(s
);
7530 if (!fp_access_check(s
)) {
7534 idx
= extract32(imm5
, 1 + size
, 4 - size
);
7535 write_vec_element(s
, cpu_reg(s
, rn
), rd
, idx
, size
);
7537 /* INS is considered a 128-bit write for SVE. */
7538 clear_vec_high(s
, true, rd
);
7545 * 31 30 29 21 20 16 15 12 10 9 5 4 0
7546 * +---+---+-------------------+--------+-------------+------+------+
7547 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
7548 * +---+---+-------------------+--------+-------------+------+------+
7550 * U: unsigned when set
7551 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7553 static void handle_simd_umov_smov(DisasContext
*s
, int is_q
, int is_signed
,
7554 int rn
, int rd
, int imm5
)
7556 int size
= ctz32(imm5
);
7560 /* Check for UnallocatedEncodings */
7562 if (size
> 2 || (size
== 2 && !is_q
)) {
7563 unallocated_encoding(s
);
7568 || (size
< 3 && is_q
)
7569 || (size
== 3 && !is_q
)) {
7570 unallocated_encoding(s
);
7575 if (!fp_access_check(s
)) {
7579 element
= extract32(imm5
, 1+size
, 4);
7581 tcg_rd
= cpu_reg(s
, rd
);
7582 read_vec_element(s
, tcg_rd
, rn
, element
, size
| (is_signed
? MO_SIGN
: 0));
7583 if (is_signed
&& !is_q
) {
7584 tcg_gen_ext32u_i64(tcg_rd
, tcg_rd
);
7589 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
7590 * +---+---+----+-----------------+------+---+------+---+------+------+
7591 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7592 * +---+---+----+-----------------+------+---+------+---+------+------+
7594 static void disas_simd_copy(DisasContext
*s
, uint32_t insn
)
7596 int rd
= extract32(insn
, 0, 5);
7597 int rn
= extract32(insn
, 5, 5);
7598 int imm4
= extract32(insn
, 11, 4);
7599 int op
= extract32(insn
, 29, 1);
7600 int is_q
= extract32(insn
, 30, 1);
7601 int imm5
= extract32(insn
, 16, 5);
7606 handle_simd_inse(s
, rd
, rn
, imm4
, imm5
);
7608 unallocated_encoding(s
);
7613 /* DUP (element - vector) */
7614 handle_simd_dupe(s
, is_q
, rd
, rn
, imm5
);
7618 handle_simd_dupg(s
, is_q
, rd
, rn
, imm5
);
7623 handle_simd_insg(s
, rd
, rn
, imm5
);
7625 unallocated_encoding(s
);
7630 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7631 handle_simd_umov_smov(s
, is_q
, (imm4
== 5), rn
, rd
, imm5
);
7634 unallocated_encoding(s
);
7640 /* AdvSIMD modified immediate
7641 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
7642 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7643 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
7644 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7646 * There are a number of operations that can be carried out here:
7647 * MOVI - move (shifted) imm into register
7648 * MVNI - move inverted (shifted) imm into register
7649 * ORR - bitwise OR of (shifted) imm with register
7650 * BIC - bitwise clear of (shifted) imm with register
7651 * With ARMv8.2 we also have:
7652 * FMOV half-precision
7654 static void disas_simd_mod_imm(DisasContext
*s
, uint32_t insn
)
7656 int rd
= extract32(insn
, 0, 5);
7657 int cmode
= extract32(insn
, 12, 4);
7658 int o2
= extract32(insn
, 11, 1);
7659 uint64_t abcdefgh
= extract32(insn
, 5, 5) | (extract32(insn
, 16, 3) << 5);
7660 bool is_neg
= extract32(insn
, 29, 1);
7661 bool is_q
= extract32(insn
, 30, 1);
7664 if (o2
!= 0 || ((cmode
== 0xf) && is_neg
&& !is_q
)) {
7665 /* Check for FMOV (vector, immediate) - half-precision */
7666 if (!(dc_isar_feature(aa64_fp16
, s
) && o2
&& cmode
== 0xf)) {
7667 unallocated_encoding(s
);
7672 if (!fp_access_check(s
)) {
7676 if (cmode
== 15 && o2
&& !is_neg
) {
7677 /* FMOV (vector, immediate) - half-precision */
7678 imm
= vfp_expand_imm(MO_16
, abcdefgh
);
7679 /* now duplicate across the lanes */
7680 imm
= dup_const(MO_16
, imm
);
7682 imm
= asimd_imm_const(abcdefgh
, cmode
, is_neg
);
7685 if (!((cmode
& 0x9) == 0x1 || (cmode
& 0xd) == 0x9)) {
7686 /* MOVI or MVNI, with MVNI negation handled above. */
7687 tcg_gen_gvec_dup_imm(MO_64
, vec_full_reg_offset(s
, rd
), is_q
? 16 : 8,
7688 vec_full_reg_size(s
), imm
);
7690 /* ORR or BIC, with BIC negation to AND handled above. */
7692 gen_gvec_fn2i(s
, is_q
, rd
, rd
, imm
, tcg_gen_gvec_andi
, MO_64
);
7694 gen_gvec_fn2i(s
, is_q
, rd
, rd
, imm
, tcg_gen_gvec_ori
, MO_64
);
7699 /* AdvSIMD scalar copy
7700 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
7701 * +-----+----+-----------------+------+---+------+---+------+------+
7702 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
7703 * +-----+----+-----------------+------+---+------+---+------+------+
7705 static void disas_simd_scalar_copy(DisasContext
*s
, uint32_t insn
)
7707 int rd
= extract32(insn
, 0, 5);
7708 int rn
= extract32(insn
, 5, 5);
7709 int imm4
= extract32(insn
, 11, 4);
7710 int imm5
= extract32(insn
, 16, 5);
7711 int op
= extract32(insn
, 29, 1);
7713 if (op
!= 0 || imm4
!= 0) {
7714 unallocated_encoding(s
);
7718 /* DUP (element, scalar) */
7719 handle_simd_dupes(s
, rd
, rn
, imm5
);
7722 /* AdvSIMD scalar pairwise
7723 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7724 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7725 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
7726 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7728 static void disas_simd_scalar_pairwise(DisasContext
*s
, uint32_t insn
)
7730 int u
= extract32(insn
, 29, 1);
7731 int size
= extract32(insn
, 22, 2);
7732 int opcode
= extract32(insn
, 12, 5);
7733 int rn
= extract32(insn
, 5, 5);
7734 int rd
= extract32(insn
, 0, 5);
7737 /* For some ops (the FP ones), size[1] is part of the encoding.
7738 * For ADDP strictly it is not but size[1] is always 1 for valid
7741 opcode
|= (extract32(size
, 1, 1) << 5);
7744 case 0x3b: /* ADDP */
7745 if (u
|| size
!= 3) {
7746 unallocated_encoding(s
);
7749 if (!fp_access_check(s
)) {
7755 case 0xc: /* FMAXNMP */
7756 case 0xd: /* FADDP */
7757 case 0xf: /* FMAXP */
7758 case 0x2c: /* FMINNMP */
7759 case 0x2f: /* FMINP */
7760 /* FP op, size[0] is 32 or 64 bit*/
7762 if (!dc_isar_feature(aa64_fp16
, s
)) {
7763 unallocated_encoding(s
);
7769 size
= extract32(size
, 0, 1) ? MO_64
: MO_32
;
7772 if (!fp_access_check(s
)) {
7776 fpst
= fpstatus_ptr(size
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
7779 unallocated_encoding(s
);
7783 if (size
== MO_64
) {
7784 TCGv_i64 tcg_op1
= tcg_temp_new_i64();
7785 TCGv_i64 tcg_op2
= tcg_temp_new_i64();
7786 TCGv_i64 tcg_res
= tcg_temp_new_i64();
7788 read_vec_element(s
, tcg_op1
, rn
, 0, MO_64
);
7789 read_vec_element(s
, tcg_op2
, rn
, 1, MO_64
);
7792 case 0x3b: /* ADDP */
7793 tcg_gen_add_i64(tcg_res
, tcg_op1
, tcg_op2
);
7795 case 0xc: /* FMAXNMP */
7796 gen_helper_vfp_maxnumd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7798 case 0xd: /* FADDP */
7799 gen_helper_vfp_addd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7801 case 0xf: /* FMAXP */
7802 gen_helper_vfp_maxd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7804 case 0x2c: /* FMINNMP */
7805 gen_helper_vfp_minnumd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7807 case 0x2f: /* FMINP */
7808 gen_helper_vfp_mind(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7811 g_assert_not_reached();
7814 write_fp_dreg(s
, rd
, tcg_res
);
7816 TCGv_i32 tcg_op1
= tcg_temp_new_i32();
7817 TCGv_i32 tcg_op2
= tcg_temp_new_i32();
7818 TCGv_i32 tcg_res
= tcg_temp_new_i32();
7820 read_vec_element_i32(s
, tcg_op1
, rn
, 0, size
);
7821 read_vec_element_i32(s
, tcg_op2
, rn
, 1, size
);
7823 if (size
== MO_16
) {
7825 case 0xc: /* FMAXNMP */
7826 gen_helper_advsimd_maxnumh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7828 case 0xd: /* FADDP */
7829 gen_helper_advsimd_addh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7831 case 0xf: /* FMAXP */
7832 gen_helper_advsimd_maxh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7834 case 0x2c: /* FMINNMP */
7835 gen_helper_advsimd_minnumh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7837 case 0x2f: /* FMINP */
7838 gen_helper_advsimd_minh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7841 g_assert_not_reached();
7845 case 0xc: /* FMAXNMP */
7846 gen_helper_vfp_maxnums(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7848 case 0xd: /* FADDP */
7849 gen_helper_vfp_adds(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7851 case 0xf: /* FMAXP */
7852 gen_helper_vfp_maxs(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7854 case 0x2c: /* FMINNMP */
7855 gen_helper_vfp_minnums(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7857 case 0x2f: /* FMINP */
7858 gen_helper_vfp_mins(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
7861 g_assert_not_reached();
7865 write_fp_sreg(s
, rd
, tcg_res
);
7870 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7872 * This code is handles the common shifting code and is used by both
7873 * the vector and scalar code.
7875 static void handle_shri_with_rndacc(TCGv_i64 tcg_res
, TCGv_i64 tcg_src
,
7876 TCGv_i64 tcg_rnd
, bool accumulate
,
7877 bool is_u
, int size
, int shift
)
7879 bool extended_result
= false;
7880 bool round
= tcg_rnd
!= NULL
;
7882 TCGv_i64 tcg_src_hi
;
7884 if (round
&& size
== 3) {
7885 extended_result
= true;
7886 ext_lshift
= 64 - shift
;
7887 tcg_src_hi
= tcg_temp_new_i64();
7888 } else if (shift
== 64) {
7889 if (!accumulate
&& is_u
) {
7890 /* result is zero */
7891 tcg_gen_movi_i64(tcg_res
, 0);
7896 /* Deal with the rounding step */
7898 if (extended_result
) {
7899 TCGv_i64 tcg_zero
= tcg_constant_i64(0);
7901 /* take care of sign extending tcg_res */
7902 tcg_gen_sari_i64(tcg_src_hi
, tcg_src
, 63);
7903 tcg_gen_add2_i64(tcg_src
, tcg_src_hi
,
7904 tcg_src
, tcg_src_hi
,
7907 tcg_gen_add2_i64(tcg_src
, tcg_src_hi
,
7912 tcg_gen_add_i64(tcg_src
, tcg_src
, tcg_rnd
);
7916 /* Now do the shift right */
7917 if (round
&& extended_result
) {
7918 /* extended case, >64 bit precision required */
7919 if (ext_lshift
== 0) {
7920 /* special case, only high bits matter */
7921 tcg_gen_mov_i64(tcg_src
, tcg_src_hi
);
7923 tcg_gen_shri_i64(tcg_src
, tcg_src
, shift
);
7924 tcg_gen_shli_i64(tcg_src_hi
, tcg_src_hi
, ext_lshift
);
7925 tcg_gen_or_i64(tcg_src
, tcg_src
, tcg_src_hi
);
7930 /* essentially shifting in 64 zeros */
7931 tcg_gen_movi_i64(tcg_src
, 0);
7933 tcg_gen_shri_i64(tcg_src
, tcg_src
, shift
);
7937 /* effectively extending the sign-bit */
7938 tcg_gen_sari_i64(tcg_src
, tcg_src
, 63);
7940 tcg_gen_sari_i64(tcg_src
, tcg_src
, shift
);
7946 tcg_gen_add_i64(tcg_res
, tcg_res
, tcg_src
);
7948 tcg_gen_mov_i64(tcg_res
, tcg_src
);
7952 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
7953 static void handle_scalar_simd_shri(DisasContext
*s
,
7954 bool is_u
, int immh
, int immb
,
7955 int opcode
, int rn
, int rd
)
7958 int immhb
= immh
<< 3 | immb
;
7959 int shift
= 2 * (8 << size
) - immhb
;
7960 bool accumulate
= false;
7962 bool insert
= false;
7967 if (!extract32(immh
, 3, 1)) {
7968 unallocated_encoding(s
);
7972 if (!fp_access_check(s
)) {
7977 case 0x02: /* SSRA / USRA (accumulate) */
7980 case 0x04: /* SRSHR / URSHR (rounding) */
7983 case 0x06: /* SRSRA / URSRA (accum + rounding) */
7984 accumulate
= round
= true;
7986 case 0x08: /* SRI */
7992 tcg_round
= tcg_constant_i64(1ULL << (shift
- 1));
7997 tcg_rn
= read_fp_dreg(s
, rn
);
7998 tcg_rd
= (accumulate
|| insert
) ? read_fp_dreg(s
, rd
) : tcg_temp_new_i64();
8001 /* shift count same as element size is valid but does nothing;
8002 * special case to avoid potential shift by 64.
8004 int esize
= 8 << size
;
8005 if (shift
!= esize
) {
8006 tcg_gen_shri_i64(tcg_rn
, tcg_rn
, shift
);
8007 tcg_gen_deposit_i64(tcg_rd
, tcg_rd
, tcg_rn
, 0, esize
- shift
);
8010 handle_shri_with_rndacc(tcg_rd
, tcg_rn
, tcg_round
,
8011 accumulate
, is_u
, size
, shift
);
8014 write_fp_dreg(s
, rd
, tcg_rd
);
8017 /* SHL/SLI - Scalar shift left */
8018 static void handle_scalar_simd_shli(DisasContext
*s
, bool insert
,
8019 int immh
, int immb
, int opcode
,
8022 int size
= 32 - clz32(immh
) - 1;
8023 int immhb
= immh
<< 3 | immb
;
8024 int shift
= immhb
- (8 << size
);
8028 if (!extract32(immh
, 3, 1)) {
8029 unallocated_encoding(s
);
8033 if (!fp_access_check(s
)) {
8037 tcg_rn
= read_fp_dreg(s
, rn
);
8038 tcg_rd
= insert
? read_fp_dreg(s
, rd
) : tcg_temp_new_i64();
8041 tcg_gen_deposit_i64(tcg_rd
, tcg_rd
, tcg_rn
, shift
, 64 - shift
);
8043 tcg_gen_shli_i64(tcg_rd
, tcg_rn
, shift
);
8046 write_fp_dreg(s
, rd
, tcg_rd
);
8049 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8050 * (signed/unsigned) narrowing */
8051 static void handle_vec_simd_sqshrn(DisasContext
*s
, bool is_scalar
, bool is_q
,
8052 bool is_u_shift
, bool is_u_narrow
,
8053 int immh
, int immb
, int opcode
,
8056 int immhb
= immh
<< 3 | immb
;
8057 int size
= 32 - clz32(immh
) - 1;
8058 int esize
= 8 << size
;
8059 int shift
= (2 * esize
) - immhb
;
8060 int elements
= is_scalar
? 1 : (64 / esize
);
8061 bool round
= extract32(opcode
, 0, 1);
8062 MemOp ldop
= (size
+ 1) | (is_u_shift
? 0 : MO_SIGN
);
8063 TCGv_i64 tcg_rn
, tcg_rd
, tcg_round
;
8064 TCGv_i32 tcg_rd_narrowed
;
8067 static NeonGenNarrowEnvFn
* const signed_narrow_fns
[4][2] = {
8068 { gen_helper_neon_narrow_sat_s8
,
8069 gen_helper_neon_unarrow_sat8
},
8070 { gen_helper_neon_narrow_sat_s16
,
8071 gen_helper_neon_unarrow_sat16
},
8072 { gen_helper_neon_narrow_sat_s32
,
8073 gen_helper_neon_unarrow_sat32
},
8076 static NeonGenNarrowEnvFn
* const unsigned_narrow_fns
[4] = {
8077 gen_helper_neon_narrow_sat_u8
,
8078 gen_helper_neon_narrow_sat_u16
,
8079 gen_helper_neon_narrow_sat_u32
,
8082 NeonGenNarrowEnvFn
*narrowfn
;
8088 if (extract32(immh
, 3, 1)) {
8089 unallocated_encoding(s
);
8093 if (!fp_access_check(s
)) {
8098 narrowfn
= unsigned_narrow_fns
[size
];
8100 narrowfn
= signed_narrow_fns
[size
][is_u_narrow
? 1 : 0];
8103 tcg_rn
= tcg_temp_new_i64();
8104 tcg_rd
= tcg_temp_new_i64();
8105 tcg_rd_narrowed
= tcg_temp_new_i32();
8106 tcg_final
= tcg_temp_new_i64();
8109 tcg_round
= tcg_constant_i64(1ULL << (shift
- 1));
8114 for (i
= 0; i
< elements
; i
++) {
8115 read_vec_element(s
, tcg_rn
, rn
, i
, ldop
);
8116 handle_shri_with_rndacc(tcg_rd
, tcg_rn
, tcg_round
,
8117 false, is_u_shift
, size
+1, shift
);
8118 narrowfn(tcg_rd_narrowed
, cpu_env
, tcg_rd
);
8119 tcg_gen_extu_i32_i64(tcg_rd
, tcg_rd_narrowed
);
8121 tcg_gen_mov_i64(tcg_final
, tcg_rd
);
8123 tcg_gen_deposit_i64(tcg_final
, tcg_final
, tcg_rd
, esize
* i
, esize
);
8128 write_vec_element(s
, tcg_final
, rd
, 0, MO_64
);
8130 write_vec_element(s
, tcg_final
, rd
, 1, MO_64
);
8132 clear_vec_high(s
, is_q
, rd
);
8135 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8136 static void handle_simd_qshl(DisasContext
*s
, bool scalar
, bool is_q
,
8137 bool src_unsigned
, bool dst_unsigned
,
8138 int immh
, int immb
, int rn
, int rd
)
8140 int immhb
= immh
<< 3 | immb
;
8141 int size
= 32 - clz32(immh
) - 1;
8142 int shift
= immhb
- (8 << size
);
8146 assert(!(scalar
&& is_q
));
8149 if (!is_q
&& extract32(immh
, 3, 1)) {
8150 unallocated_encoding(s
);
8154 /* Since we use the variable-shift helpers we must
8155 * replicate the shift count into each element of
8156 * the tcg_shift value.
8160 shift
|= shift
<< 8;
8163 shift
|= shift
<< 16;
8169 g_assert_not_reached();
8173 if (!fp_access_check(s
)) {
8178 TCGv_i64 tcg_shift
= tcg_constant_i64(shift
);
8179 static NeonGenTwo64OpEnvFn
* const fns
[2][2] = {
8180 { gen_helper_neon_qshl_s64
, gen_helper_neon_qshlu_s64
},
8181 { NULL
, gen_helper_neon_qshl_u64
},
8183 NeonGenTwo64OpEnvFn
*genfn
= fns
[src_unsigned
][dst_unsigned
];
8184 int maxpass
= is_q
? 2 : 1;
8186 for (pass
= 0; pass
< maxpass
; pass
++) {
8187 TCGv_i64 tcg_op
= tcg_temp_new_i64();
8189 read_vec_element(s
, tcg_op
, rn
, pass
, MO_64
);
8190 genfn(tcg_op
, cpu_env
, tcg_op
, tcg_shift
);
8191 write_vec_element(s
, tcg_op
, rd
, pass
, MO_64
);
8193 clear_vec_high(s
, is_q
, rd
);
8195 TCGv_i32 tcg_shift
= tcg_constant_i32(shift
);
8196 static NeonGenTwoOpEnvFn
* const fns
[2][2][3] = {
8198 { gen_helper_neon_qshl_s8
,
8199 gen_helper_neon_qshl_s16
,
8200 gen_helper_neon_qshl_s32
},
8201 { gen_helper_neon_qshlu_s8
,
8202 gen_helper_neon_qshlu_s16
,
8203 gen_helper_neon_qshlu_s32
}
8205 { NULL
, NULL
, NULL
},
8206 { gen_helper_neon_qshl_u8
,
8207 gen_helper_neon_qshl_u16
,
8208 gen_helper_neon_qshl_u32
}
8211 NeonGenTwoOpEnvFn
*genfn
= fns
[src_unsigned
][dst_unsigned
][size
];
8212 MemOp memop
= scalar
? size
: MO_32
;
8213 int maxpass
= scalar
? 1 : is_q
? 4 : 2;
8215 for (pass
= 0; pass
< maxpass
; pass
++) {
8216 TCGv_i32 tcg_op
= tcg_temp_new_i32();
8218 read_vec_element_i32(s
, tcg_op
, rn
, pass
, memop
);
8219 genfn(tcg_op
, cpu_env
, tcg_op
, tcg_shift
);
8223 tcg_gen_ext8u_i32(tcg_op
, tcg_op
);
8226 tcg_gen_ext16u_i32(tcg_op
, tcg_op
);
8231 g_assert_not_reached();
8233 write_fp_sreg(s
, rd
, tcg_op
);
8235 write_vec_element_i32(s
, tcg_op
, rd
, pass
, MO_32
);
8240 clear_vec_high(s
, is_q
, rd
);
8245 /* Common vector code for handling integer to FP conversion */
8246 static void handle_simd_intfp_conv(DisasContext
*s
, int rd
, int rn
,
8247 int elements
, int is_signed
,
8248 int fracbits
, int size
)
8250 TCGv_ptr tcg_fpst
= fpstatus_ptr(size
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
8251 TCGv_i32 tcg_shift
= NULL
;
8253 MemOp mop
= size
| (is_signed
? MO_SIGN
: 0);
8256 if (fracbits
|| size
== MO_64
) {
8257 tcg_shift
= tcg_constant_i32(fracbits
);
8260 if (size
== MO_64
) {
8261 TCGv_i64 tcg_int64
= tcg_temp_new_i64();
8262 TCGv_i64 tcg_double
= tcg_temp_new_i64();
8264 for (pass
= 0; pass
< elements
; pass
++) {
8265 read_vec_element(s
, tcg_int64
, rn
, pass
, mop
);
8268 gen_helper_vfp_sqtod(tcg_double
, tcg_int64
,
8269 tcg_shift
, tcg_fpst
);
8271 gen_helper_vfp_uqtod(tcg_double
, tcg_int64
,
8272 tcg_shift
, tcg_fpst
);
8274 if (elements
== 1) {
8275 write_fp_dreg(s
, rd
, tcg_double
);
8277 write_vec_element(s
, tcg_double
, rd
, pass
, MO_64
);
8281 TCGv_i32 tcg_int32
= tcg_temp_new_i32();
8282 TCGv_i32 tcg_float
= tcg_temp_new_i32();
8284 for (pass
= 0; pass
< elements
; pass
++) {
8285 read_vec_element_i32(s
, tcg_int32
, rn
, pass
, mop
);
8291 gen_helper_vfp_sltos(tcg_float
, tcg_int32
,
8292 tcg_shift
, tcg_fpst
);
8294 gen_helper_vfp_ultos(tcg_float
, tcg_int32
,
8295 tcg_shift
, tcg_fpst
);
8299 gen_helper_vfp_sitos(tcg_float
, tcg_int32
, tcg_fpst
);
8301 gen_helper_vfp_uitos(tcg_float
, tcg_int32
, tcg_fpst
);
8308 gen_helper_vfp_sltoh(tcg_float
, tcg_int32
,
8309 tcg_shift
, tcg_fpst
);
8311 gen_helper_vfp_ultoh(tcg_float
, tcg_int32
,
8312 tcg_shift
, tcg_fpst
);
8316 gen_helper_vfp_sitoh(tcg_float
, tcg_int32
, tcg_fpst
);
8318 gen_helper_vfp_uitoh(tcg_float
, tcg_int32
, tcg_fpst
);
8323 g_assert_not_reached();
8326 if (elements
== 1) {
8327 write_fp_sreg(s
, rd
, tcg_float
);
8329 write_vec_element_i32(s
, tcg_float
, rd
, pass
, size
);
8334 clear_vec_high(s
, elements
<< size
== 16, rd
);
8337 /* UCVTF/SCVTF - Integer to FP conversion */
8338 static void handle_simd_shift_intfp_conv(DisasContext
*s
, bool is_scalar
,
8339 bool is_q
, bool is_u
,
8340 int immh
, int immb
, int opcode
,
8343 int size
, elements
, fracbits
;
8344 int immhb
= immh
<< 3 | immb
;
8348 if (!is_scalar
&& !is_q
) {
8349 unallocated_encoding(s
);
8352 } else if (immh
& 4) {
8354 } else if (immh
& 2) {
8356 if (!dc_isar_feature(aa64_fp16
, s
)) {
8357 unallocated_encoding(s
);
8361 /* immh == 0 would be a failure of the decode logic */
8362 g_assert(immh
== 1);
8363 unallocated_encoding(s
);
8370 elements
= (8 << is_q
) >> size
;
8372 fracbits
= (16 << size
) - immhb
;
8374 if (!fp_access_check(s
)) {
8378 handle_simd_intfp_conv(s
, rd
, rn
, elements
, !is_u
, fracbits
, size
);
8381 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8382 static void handle_simd_shift_fpint_conv(DisasContext
*s
, bool is_scalar
,
8383 bool is_q
, bool is_u
,
8384 int immh
, int immb
, int rn
, int rd
)
8386 int immhb
= immh
<< 3 | immb
;
8387 int pass
, size
, fracbits
;
8388 TCGv_ptr tcg_fpstatus
;
8389 TCGv_i32 tcg_rmode
, tcg_shift
;
8393 if (!is_scalar
&& !is_q
) {
8394 unallocated_encoding(s
);
8397 } else if (immh
& 0x4) {
8399 } else if (immh
& 0x2) {
8401 if (!dc_isar_feature(aa64_fp16
, s
)) {
8402 unallocated_encoding(s
);
8406 /* Should have split out AdvSIMD modified immediate earlier. */
8408 unallocated_encoding(s
);
8412 if (!fp_access_check(s
)) {
8416 assert(!(is_scalar
&& is_q
));
8418 tcg_fpstatus
= fpstatus_ptr(size
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
8419 tcg_rmode
= gen_set_rmode(FPROUNDING_ZERO
, tcg_fpstatus
);
8420 fracbits
= (16 << size
) - immhb
;
8421 tcg_shift
= tcg_constant_i32(fracbits
);
8423 if (size
== MO_64
) {
8424 int maxpass
= is_scalar
? 1 : 2;
8426 for (pass
= 0; pass
< maxpass
; pass
++) {
8427 TCGv_i64 tcg_op
= tcg_temp_new_i64();
8429 read_vec_element(s
, tcg_op
, rn
, pass
, MO_64
);
8431 gen_helper_vfp_touqd(tcg_op
, tcg_op
, tcg_shift
, tcg_fpstatus
);
8433 gen_helper_vfp_tosqd(tcg_op
, tcg_op
, tcg_shift
, tcg_fpstatus
);
8435 write_vec_element(s
, tcg_op
, rd
, pass
, MO_64
);
8437 clear_vec_high(s
, is_q
, rd
);
8439 void (*fn
)(TCGv_i32
, TCGv_i32
, TCGv_i32
, TCGv_ptr
);
8440 int maxpass
= is_scalar
? 1 : ((8 << is_q
) >> size
);
8445 fn
= gen_helper_vfp_touhh
;
8447 fn
= gen_helper_vfp_toshh
;
8452 fn
= gen_helper_vfp_touls
;
8454 fn
= gen_helper_vfp_tosls
;
8458 g_assert_not_reached();
8461 for (pass
= 0; pass
< maxpass
; pass
++) {
8462 TCGv_i32 tcg_op
= tcg_temp_new_i32();
8464 read_vec_element_i32(s
, tcg_op
, rn
, pass
, size
);
8465 fn(tcg_op
, tcg_op
, tcg_shift
, tcg_fpstatus
);
8467 write_fp_sreg(s
, rd
, tcg_op
);
8469 write_vec_element_i32(s
, tcg_op
, rd
, pass
, size
);
8473 clear_vec_high(s
, is_q
, rd
);
8477 gen_restore_rmode(tcg_rmode
, tcg_fpstatus
);
8480 /* AdvSIMD scalar shift by immediate
8481 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
8482 * +-----+---+-------------+------+------+--------+---+------+------+
8483 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
8484 * +-----+---+-------------+------+------+--------+---+------+------+
8486 * This is the scalar version so it works on a fixed sized registers
8488 static void disas_simd_scalar_shift_imm(DisasContext
*s
, uint32_t insn
)
8490 int rd
= extract32(insn
, 0, 5);
8491 int rn
= extract32(insn
, 5, 5);
8492 int opcode
= extract32(insn
, 11, 5);
8493 int immb
= extract32(insn
, 16, 3);
8494 int immh
= extract32(insn
, 19, 4);
8495 bool is_u
= extract32(insn
, 29, 1);
8498 unallocated_encoding(s
);
8503 case 0x08: /* SRI */
8505 unallocated_encoding(s
);
8509 case 0x00: /* SSHR / USHR */
8510 case 0x02: /* SSRA / USRA */
8511 case 0x04: /* SRSHR / URSHR */
8512 case 0x06: /* SRSRA / URSRA */
8513 handle_scalar_simd_shri(s
, is_u
, immh
, immb
, opcode
, rn
, rd
);
8515 case 0x0a: /* SHL / SLI */
8516 handle_scalar_simd_shli(s
, is_u
, immh
, immb
, opcode
, rn
, rd
);
8518 case 0x1c: /* SCVTF, UCVTF */
8519 handle_simd_shift_intfp_conv(s
, true, false, is_u
, immh
, immb
,
8522 case 0x10: /* SQSHRUN, SQSHRUN2 */
8523 case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8525 unallocated_encoding(s
);
8528 handle_vec_simd_sqshrn(s
, true, false, false, true,
8529 immh
, immb
, opcode
, rn
, rd
);
8531 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8532 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8533 handle_vec_simd_sqshrn(s
, true, false, is_u
, is_u
,
8534 immh
, immb
, opcode
, rn
, rd
);
8536 case 0xc: /* SQSHLU */
8538 unallocated_encoding(s
);
8541 handle_simd_qshl(s
, true, false, false, true, immh
, immb
, rn
, rd
);
8543 case 0xe: /* SQSHL, UQSHL */
8544 handle_simd_qshl(s
, true, false, is_u
, is_u
, immh
, immb
, rn
, rd
);
8546 case 0x1f: /* FCVTZS, FCVTZU */
8547 handle_simd_shift_fpint_conv(s
, true, false, is_u
, immh
, immb
, rn
, rd
);
8550 unallocated_encoding(s
);
8555 /* AdvSIMD scalar three different
8556 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
8557 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8558 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
8559 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8561 static void disas_simd_scalar_three_reg_diff(DisasContext
*s
, uint32_t insn
)
8563 bool is_u
= extract32(insn
, 29, 1);
8564 int size
= extract32(insn
, 22, 2);
8565 int opcode
= extract32(insn
, 12, 4);
8566 int rm
= extract32(insn
, 16, 5);
8567 int rn
= extract32(insn
, 5, 5);
8568 int rd
= extract32(insn
, 0, 5);
8571 unallocated_encoding(s
);
8576 case 0x9: /* SQDMLAL, SQDMLAL2 */
8577 case 0xb: /* SQDMLSL, SQDMLSL2 */
8578 case 0xd: /* SQDMULL, SQDMULL2 */
8579 if (size
== 0 || size
== 3) {
8580 unallocated_encoding(s
);
8585 unallocated_encoding(s
);
8589 if (!fp_access_check(s
)) {
8594 TCGv_i64 tcg_op1
= tcg_temp_new_i64();
8595 TCGv_i64 tcg_op2
= tcg_temp_new_i64();
8596 TCGv_i64 tcg_res
= tcg_temp_new_i64();
8598 read_vec_element(s
, tcg_op1
, rn
, 0, MO_32
| MO_SIGN
);
8599 read_vec_element(s
, tcg_op2
, rm
, 0, MO_32
| MO_SIGN
);
8601 tcg_gen_mul_i64(tcg_res
, tcg_op1
, tcg_op2
);
8602 gen_helper_neon_addl_saturate_s64(tcg_res
, cpu_env
, tcg_res
, tcg_res
);
8605 case 0xd: /* SQDMULL, SQDMULL2 */
8607 case 0xb: /* SQDMLSL, SQDMLSL2 */
8608 tcg_gen_neg_i64(tcg_res
, tcg_res
);
8610 case 0x9: /* SQDMLAL, SQDMLAL2 */
8611 read_vec_element(s
, tcg_op1
, rd
, 0, MO_64
);
8612 gen_helper_neon_addl_saturate_s64(tcg_res
, cpu_env
,
8616 g_assert_not_reached();
8619 write_fp_dreg(s
, rd
, tcg_res
);
8621 TCGv_i32 tcg_op1
= read_fp_hreg(s
, rn
);
8622 TCGv_i32 tcg_op2
= read_fp_hreg(s
, rm
);
8623 TCGv_i64 tcg_res
= tcg_temp_new_i64();
8625 gen_helper_neon_mull_s16(tcg_res
, tcg_op1
, tcg_op2
);
8626 gen_helper_neon_addl_saturate_s32(tcg_res
, cpu_env
, tcg_res
, tcg_res
);
8629 case 0xd: /* SQDMULL, SQDMULL2 */
8631 case 0xb: /* SQDMLSL, SQDMLSL2 */
8632 gen_helper_neon_negl_u32(tcg_res
, tcg_res
);
8634 case 0x9: /* SQDMLAL, SQDMLAL2 */
8636 TCGv_i64 tcg_op3
= tcg_temp_new_i64();
8637 read_vec_element(s
, tcg_op3
, rd
, 0, MO_32
);
8638 gen_helper_neon_addl_saturate_s32(tcg_res
, cpu_env
,
8643 g_assert_not_reached();
8646 tcg_gen_ext32u_i64(tcg_res
, tcg_res
);
8647 write_fp_dreg(s
, rd
, tcg_res
);
8651 static void handle_3same_64(DisasContext
*s
, int opcode
, bool u
,
8652 TCGv_i64 tcg_rd
, TCGv_i64 tcg_rn
, TCGv_i64 tcg_rm
)
8654 /* Handle 64x64->64 opcodes which are shared between the scalar
8655 * and vector 3-same groups. We cover every opcode where size == 3
8656 * is valid in either the three-reg-same (integer, not pairwise)
8657 * or scalar-three-reg-same groups.
8662 case 0x1: /* SQADD */
8664 gen_helper_neon_qadd_u64(tcg_rd
, cpu_env
, tcg_rn
, tcg_rm
);
8666 gen_helper_neon_qadd_s64(tcg_rd
, cpu_env
, tcg_rn
, tcg_rm
);
8669 case 0x5: /* SQSUB */
8671 gen_helper_neon_qsub_u64(tcg_rd
, cpu_env
, tcg_rn
, tcg_rm
);
8673 gen_helper_neon_qsub_s64(tcg_rd
, cpu_env
, tcg_rn
, tcg_rm
);
8676 case 0x6: /* CMGT, CMHI */
8677 cond
= u
? TCG_COND_GTU
: TCG_COND_GT
;
8679 /* 64 bit integer comparison, result = test ? -1 : 0. */
8680 tcg_gen_negsetcond_i64(cond
, tcg_rd
, tcg_rn
, tcg_rm
);
8682 case 0x7: /* CMGE, CMHS */
8683 cond
= u
? TCG_COND_GEU
: TCG_COND_GE
;
8685 case 0x11: /* CMTST, CMEQ */
8690 gen_cmtst_i64(tcg_rd
, tcg_rn
, tcg_rm
);
8692 case 0x8: /* SSHL, USHL */
8694 gen_ushl_i64(tcg_rd
, tcg_rn
, tcg_rm
);
8696 gen_sshl_i64(tcg_rd
, tcg_rn
, tcg_rm
);
8699 case 0x9: /* SQSHL, UQSHL */
8701 gen_helper_neon_qshl_u64(tcg_rd
, cpu_env
, tcg_rn
, tcg_rm
);
8703 gen_helper_neon_qshl_s64(tcg_rd
, cpu_env
, tcg_rn
, tcg_rm
);
8706 case 0xa: /* SRSHL, URSHL */
8708 gen_helper_neon_rshl_u64(tcg_rd
, tcg_rn
, tcg_rm
);
8710 gen_helper_neon_rshl_s64(tcg_rd
, tcg_rn
, tcg_rm
);
8713 case 0xb: /* SQRSHL, UQRSHL */
8715 gen_helper_neon_qrshl_u64(tcg_rd
, cpu_env
, tcg_rn
, tcg_rm
);
8717 gen_helper_neon_qrshl_s64(tcg_rd
, cpu_env
, tcg_rn
, tcg_rm
);
8720 case 0x10: /* ADD, SUB */
8722 tcg_gen_sub_i64(tcg_rd
, tcg_rn
, tcg_rm
);
8724 tcg_gen_add_i64(tcg_rd
, tcg_rn
, tcg_rm
);
8728 g_assert_not_reached();
8732 /* Handle the 3-same-operands float operations; shared by the scalar
8733 * and vector encodings. The caller must filter out any encodings
8734 * not allocated for the encoding it is dealing with.
8736 static void handle_3same_float(DisasContext
*s
, int size
, int elements
,
8737 int fpopcode
, int rd
, int rn
, int rm
)
8740 TCGv_ptr fpst
= fpstatus_ptr(FPST_FPCR
);
8742 for (pass
= 0; pass
< elements
; pass
++) {
8745 TCGv_i64 tcg_op1
= tcg_temp_new_i64();
8746 TCGv_i64 tcg_op2
= tcg_temp_new_i64();
8747 TCGv_i64 tcg_res
= tcg_temp_new_i64();
8749 read_vec_element(s
, tcg_op1
, rn
, pass
, MO_64
);
8750 read_vec_element(s
, tcg_op2
, rm
, pass
, MO_64
);
8753 case 0x39: /* FMLS */
8754 /* As usual for ARM, separate negation for fused multiply-add */
8755 gen_helper_vfp_negd(tcg_op1
, tcg_op1
);
8757 case 0x19: /* FMLA */
8758 read_vec_element(s
, tcg_res
, rd
, pass
, MO_64
);
8759 gen_helper_vfp_muladdd(tcg_res
, tcg_op1
, tcg_op2
,
8762 case 0x18: /* FMAXNM */
8763 gen_helper_vfp_maxnumd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8765 case 0x1a: /* FADD */
8766 gen_helper_vfp_addd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8768 case 0x1b: /* FMULX */
8769 gen_helper_vfp_mulxd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8771 case 0x1c: /* FCMEQ */
8772 gen_helper_neon_ceq_f64(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8774 case 0x1e: /* FMAX */
8775 gen_helper_vfp_maxd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8777 case 0x1f: /* FRECPS */
8778 gen_helper_recpsf_f64(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8780 case 0x38: /* FMINNM */
8781 gen_helper_vfp_minnumd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8783 case 0x3a: /* FSUB */
8784 gen_helper_vfp_subd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8786 case 0x3e: /* FMIN */
8787 gen_helper_vfp_mind(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8789 case 0x3f: /* FRSQRTS */
8790 gen_helper_rsqrtsf_f64(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8792 case 0x5b: /* FMUL */
8793 gen_helper_vfp_muld(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8795 case 0x5c: /* FCMGE */
8796 gen_helper_neon_cge_f64(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8798 case 0x5d: /* FACGE */
8799 gen_helper_neon_acge_f64(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8801 case 0x5f: /* FDIV */
8802 gen_helper_vfp_divd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8804 case 0x7a: /* FABD */
8805 gen_helper_vfp_subd(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8806 gen_helper_vfp_absd(tcg_res
, tcg_res
);
8808 case 0x7c: /* FCMGT */
8809 gen_helper_neon_cgt_f64(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8811 case 0x7d: /* FACGT */
8812 gen_helper_neon_acgt_f64(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8815 g_assert_not_reached();
8818 write_vec_element(s
, tcg_res
, rd
, pass
, MO_64
);
8821 TCGv_i32 tcg_op1
= tcg_temp_new_i32();
8822 TCGv_i32 tcg_op2
= tcg_temp_new_i32();
8823 TCGv_i32 tcg_res
= tcg_temp_new_i32();
8825 read_vec_element_i32(s
, tcg_op1
, rn
, pass
, MO_32
);
8826 read_vec_element_i32(s
, tcg_op2
, rm
, pass
, MO_32
);
8829 case 0x39: /* FMLS */
8830 /* As usual for ARM, separate negation for fused multiply-add */
8831 gen_helper_vfp_negs(tcg_op1
, tcg_op1
);
8833 case 0x19: /* FMLA */
8834 read_vec_element_i32(s
, tcg_res
, rd
, pass
, MO_32
);
8835 gen_helper_vfp_muladds(tcg_res
, tcg_op1
, tcg_op2
,
8838 case 0x1a: /* FADD */
8839 gen_helper_vfp_adds(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8841 case 0x1b: /* FMULX */
8842 gen_helper_vfp_mulxs(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8844 case 0x1c: /* FCMEQ */
8845 gen_helper_neon_ceq_f32(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8847 case 0x1e: /* FMAX */
8848 gen_helper_vfp_maxs(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8850 case 0x1f: /* FRECPS */
8851 gen_helper_recpsf_f32(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8853 case 0x18: /* FMAXNM */
8854 gen_helper_vfp_maxnums(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8856 case 0x38: /* FMINNM */
8857 gen_helper_vfp_minnums(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8859 case 0x3a: /* FSUB */
8860 gen_helper_vfp_subs(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8862 case 0x3e: /* FMIN */
8863 gen_helper_vfp_mins(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8865 case 0x3f: /* FRSQRTS */
8866 gen_helper_rsqrtsf_f32(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8868 case 0x5b: /* FMUL */
8869 gen_helper_vfp_muls(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8871 case 0x5c: /* FCMGE */
8872 gen_helper_neon_cge_f32(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8874 case 0x5d: /* FACGE */
8875 gen_helper_neon_acge_f32(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8877 case 0x5f: /* FDIV */
8878 gen_helper_vfp_divs(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8880 case 0x7a: /* FABD */
8881 gen_helper_vfp_subs(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8882 gen_helper_vfp_abss(tcg_res
, tcg_res
);
8884 case 0x7c: /* FCMGT */
8885 gen_helper_neon_cgt_f32(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8887 case 0x7d: /* FACGT */
8888 gen_helper_neon_acgt_f32(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
8891 g_assert_not_reached();
8894 if (elements
== 1) {
8895 /* scalar single so clear high part */
8896 TCGv_i64 tcg_tmp
= tcg_temp_new_i64();
8898 tcg_gen_extu_i32_i64(tcg_tmp
, tcg_res
);
8899 write_vec_element(s
, tcg_tmp
, rd
, pass
, MO_64
);
8901 write_vec_element_i32(s
, tcg_res
, rd
, pass
, MO_32
);
8906 clear_vec_high(s
, elements
* (size
? 8 : 4) > 8, rd
);
8909 /* AdvSIMD scalar three same
8910 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
8911 * +-----+---+-----------+------+---+------+--------+---+------+------+
8912 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
8913 * +-----+---+-----------+------+---+------+--------+---+------+------+
8915 static void disas_simd_scalar_three_reg_same(DisasContext
*s
, uint32_t insn
)
8917 int rd
= extract32(insn
, 0, 5);
8918 int rn
= extract32(insn
, 5, 5);
8919 int opcode
= extract32(insn
, 11, 5);
8920 int rm
= extract32(insn
, 16, 5);
8921 int size
= extract32(insn
, 22, 2);
8922 bool u
= extract32(insn
, 29, 1);
8925 if (opcode
>= 0x18) {
8926 /* Floating point: U, size[1] and opcode indicate operation */
8927 int fpopcode
= opcode
| (extract32(size
, 1, 1) << 5) | (u
<< 6);
8929 case 0x1b: /* FMULX */
8930 case 0x1f: /* FRECPS */
8931 case 0x3f: /* FRSQRTS */
8932 case 0x5d: /* FACGE */
8933 case 0x7d: /* FACGT */
8934 case 0x1c: /* FCMEQ */
8935 case 0x5c: /* FCMGE */
8936 case 0x7c: /* FCMGT */
8937 case 0x7a: /* FABD */
8940 unallocated_encoding(s
);
8944 if (!fp_access_check(s
)) {
8948 handle_3same_float(s
, extract32(size
, 0, 1), 1, fpopcode
, rd
, rn
, rm
);
8953 case 0x1: /* SQADD, UQADD */
8954 case 0x5: /* SQSUB, UQSUB */
8955 case 0x9: /* SQSHL, UQSHL */
8956 case 0xb: /* SQRSHL, UQRSHL */
8958 case 0x8: /* SSHL, USHL */
8959 case 0xa: /* SRSHL, URSHL */
8960 case 0x6: /* CMGT, CMHI */
8961 case 0x7: /* CMGE, CMHS */
8962 case 0x11: /* CMTST, CMEQ */
8963 case 0x10: /* ADD, SUB (vector) */
8965 unallocated_encoding(s
);
8969 case 0x16: /* SQDMULH, SQRDMULH (vector) */
8970 if (size
!= 1 && size
!= 2) {
8971 unallocated_encoding(s
);
8976 unallocated_encoding(s
);
8980 if (!fp_access_check(s
)) {
8984 tcg_rd
= tcg_temp_new_i64();
8987 TCGv_i64 tcg_rn
= read_fp_dreg(s
, rn
);
8988 TCGv_i64 tcg_rm
= read_fp_dreg(s
, rm
);
8990 handle_3same_64(s
, opcode
, u
, tcg_rd
, tcg_rn
, tcg_rm
);
8992 /* Do a single operation on the lowest element in the vector.
8993 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
8994 * no side effects for all these operations.
8995 * OPTME: special-purpose helpers would avoid doing some
8996 * unnecessary work in the helper for the 8 and 16 bit cases.
8998 NeonGenTwoOpEnvFn
*genenvfn
;
8999 TCGv_i32 tcg_rn
= tcg_temp_new_i32();
9000 TCGv_i32 tcg_rm
= tcg_temp_new_i32();
9001 TCGv_i32 tcg_rd32
= tcg_temp_new_i32();
9003 read_vec_element_i32(s
, tcg_rn
, rn
, 0, size
);
9004 read_vec_element_i32(s
, tcg_rm
, rm
, 0, size
);
9007 case 0x1: /* SQADD, UQADD */
9009 static NeonGenTwoOpEnvFn
* const fns
[3][2] = {
9010 { gen_helper_neon_qadd_s8
, gen_helper_neon_qadd_u8
},
9011 { gen_helper_neon_qadd_s16
, gen_helper_neon_qadd_u16
},
9012 { gen_helper_neon_qadd_s32
, gen_helper_neon_qadd_u32
},
9014 genenvfn
= fns
[size
][u
];
9017 case 0x5: /* SQSUB, UQSUB */
9019 static NeonGenTwoOpEnvFn
* const fns
[3][2] = {
9020 { gen_helper_neon_qsub_s8
, gen_helper_neon_qsub_u8
},
9021 { gen_helper_neon_qsub_s16
, gen_helper_neon_qsub_u16
},
9022 { gen_helper_neon_qsub_s32
, gen_helper_neon_qsub_u32
},
9024 genenvfn
= fns
[size
][u
];
9027 case 0x9: /* SQSHL, UQSHL */
9029 static NeonGenTwoOpEnvFn
* const fns
[3][2] = {
9030 { gen_helper_neon_qshl_s8
, gen_helper_neon_qshl_u8
},
9031 { gen_helper_neon_qshl_s16
, gen_helper_neon_qshl_u16
},
9032 { gen_helper_neon_qshl_s32
, gen_helper_neon_qshl_u32
},
9034 genenvfn
= fns
[size
][u
];
9037 case 0xb: /* SQRSHL, UQRSHL */
9039 static NeonGenTwoOpEnvFn
* const fns
[3][2] = {
9040 { gen_helper_neon_qrshl_s8
, gen_helper_neon_qrshl_u8
},
9041 { gen_helper_neon_qrshl_s16
, gen_helper_neon_qrshl_u16
},
9042 { gen_helper_neon_qrshl_s32
, gen_helper_neon_qrshl_u32
},
9044 genenvfn
= fns
[size
][u
];
9047 case 0x16: /* SQDMULH, SQRDMULH */
9049 static NeonGenTwoOpEnvFn
* const fns
[2][2] = {
9050 { gen_helper_neon_qdmulh_s16
, gen_helper_neon_qrdmulh_s16
},
9051 { gen_helper_neon_qdmulh_s32
, gen_helper_neon_qrdmulh_s32
},
9053 assert(size
== 1 || size
== 2);
9054 genenvfn
= fns
[size
- 1][u
];
9058 g_assert_not_reached();
9061 genenvfn(tcg_rd32
, cpu_env
, tcg_rn
, tcg_rm
);
9062 tcg_gen_extu_i32_i64(tcg_rd
, tcg_rd32
);
9065 write_fp_dreg(s
, rd
, tcg_rd
);
9068 /* AdvSIMD scalar three same FP16
9069 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
9070 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9071 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
9072 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9073 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9074 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9076 static void disas_simd_scalar_three_reg_same_fp16(DisasContext
*s
,
9079 int rd
= extract32(insn
, 0, 5);
9080 int rn
= extract32(insn
, 5, 5);
9081 int opcode
= extract32(insn
, 11, 3);
9082 int rm
= extract32(insn
, 16, 5);
9083 bool u
= extract32(insn
, 29, 1);
9084 bool a
= extract32(insn
, 23, 1);
9085 int fpopcode
= opcode
| (a
<< 3) | (u
<< 4);
9092 case 0x03: /* FMULX */
9093 case 0x04: /* FCMEQ (reg) */
9094 case 0x07: /* FRECPS */
9095 case 0x0f: /* FRSQRTS */
9096 case 0x14: /* FCMGE (reg) */
9097 case 0x15: /* FACGE */
9098 case 0x1a: /* FABD */
9099 case 0x1c: /* FCMGT (reg) */
9100 case 0x1d: /* FACGT */
9103 unallocated_encoding(s
);
9107 if (!dc_isar_feature(aa64_fp16
, s
)) {
9108 unallocated_encoding(s
);
9111 if (!fp_access_check(s
)) {
9115 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
9117 tcg_op1
= read_fp_hreg(s
, rn
);
9118 tcg_op2
= read_fp_hreg(s
, rm
);
9119 tcg_res
= tcg_temp_new_i32();
9122 case 0x03: /* FMULX */
9123 gen_helper_advsimd_mulxh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
9125 case 0x04: /* FCMEQ (reg) */
9126 gen_helper_advsimd_ceq_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
9128 case 0x07: /* FRECPS */
9129 gen_helper_recpsf_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
9131 case 0x0f: /* FRSQRTS */
9132 gen_helper_rsqrtsf_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
9134 case 0x14: /* FCMGE (reg) */
9135 gen_helper_advsimd_cge_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
9137 case 0x15: /* FACGE */
9138 gen_helper_advsimd_acge_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
9140 case 0x1a: /* FABD */
9141 gen_helper_advsimd_subh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
9142 tcg_gen_andi_i32(tcg_res
, tcg_res
, 0x7fff);
9144 case 0x1c: /* FCMGT (reg) */
9145 gen_helper_advsimd_cgt_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
9147 case 0x1d: /* FACGT */
9148 gen_helper_advsimd_acgt_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
9151 g_assert_not_reached();
9154 write_fp_sreg(s
, rd
, tcg_res
);
9157 /* AdvSIMD scalar three same extra
9158 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
9159 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9160 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
9161 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9163 static void disas_simd_scalar_three_reg_same_extra(DisasContext
*s
,
9166 int rd
= extract32(insn
, 0, 5);
9167 int rn
= extract32(insn
, 5, 5);
9168 int opcode
= extract32(insn
, 11, 4);
9169 int rm
= extract32(insn
, 16, 5);
9170 int size
= extract32(insn
, 22, 2);
9171 bool u
= extract32(insn
, 29, 1);
9172 TCGv_i32 ele1
, ele2
, ele3
;
9176 switch (u
* 16 + opcode
) {
9177 case 0x10: /* SQRDMLAH (vector) */
9178 case 0x11: /* SQRDMLSH (vector) */
9179 if (size
!= 1 && size
!= 2) {
9180 unallocated_encoding(s
);
9183 feature
= dc_isar_feature(aa64_rdm
, s
);
9186 unallocated_encoding(s
);
9190 unallocated_encoding(s
);
9193 if (!fp_access_check(s
)) {
9197 /* Do a single operation on the lowest element in the vector.
9198 * We use the standard Neon helpers and rely on 0 OP 0 == 0
9199 * with no side effects for all these operations.
9200 * OPTME: special-purpose helpers would avoid doing some
9201 * unnecessary work in the helper for the 16 bit cases.
9203 ele1
= tcg_temp_new_i32();
9204 ele2
= tcg_temp_new_i32();
9205 ele3
= tcg_temp_new_i32();
9207 read_vec_element_i32(s
, ele1
, rn
, 0, size
);
9208 read_vec_element_i32(s
, ele2
, rm
, 0, size
);
9209 read_vec_element_i32(s
, ele3
, rd
, 0, size
);
9212 case 0x0: /* SQRDMLAH */
9214 gen_helper_neon_qrdmlah_s16(ele3
, cpu_env
, ele1
, ele2
, ele3
);
9216 gen_helper_neon_qrdmlah_s32(ele3
, cpu_env
, ele1
, ele2
, ele3
);
9219 case 0x1: /* SQRDMLSH */
9221 gen_helper_neon_qrdmlsh_s16(ele3
, cpu_env
, ele1
, ele2
, ele3
);
9223 gen_helper_neon_qrdmlsh_s32(ele3
, cpu_env
, ele1
, ele2
, ele3
);
9227 g_assert_not_reached();
9230 res
= tcg_temp_new_i64();
9231 tcg_gen_extu_i32_i64(res
, ele3
);
9232 write_fp_dreg(s
, rd
, res
);
9235 static void handle_2misc_64(DisasContext
*s
, int opcode
, bool u
,
9236 TCGv_i64 tcg_rd
, TCGv_i64 tcg_rn
,
9237 TCGv_i32 tcg_rmode
, TCGv_ptr tcg_fpstatus
)
9239 /* Handle 64->64 opcodes which are shared between the scalar and
9240 * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9241 * is valid in either group and also the double-precision fp ops.
9242 * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9248 case 0x4: /* CLS, CLZ */
9250 tcg_gen_clzi_i64(tcg_rd
, tcg_rn
, 64);
9252 tcg_gen_clrsb_i64(tcg_rd
, tcg_rn
);
9256 /* This opcode is shared with CNT and RBIT but we have earlier
9257 * enforced that size == 3 if and only if this is the NOT insn.
9259 tcg_gen_not_i64(tcg_rd
, tcg_rn
);
9261 case 0x7: /* SQABS, SQNEG */
9263 gen_helper_neon_qneg_s64(tcg_rd
, cpu_env
, tcg_rn
);
9265 gen_helper_neon_qabs_s64(tcg_rd
, cpu_env
, tcg_rn
);
9268 case 0xa: /* CMLT */
9271 /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
9272 tcg_gen_negsetcond_i64(cond
, tcg_rd
, tcg_rn
, tcg_constant_i64(0));
9274 case 0x8: /* CMGT, CMGE */
9275 cond
= u
? TCG_COND_GE
: TCG_COND_GT
;
9277 case 0x9: /* CMEQ, CMLE */
9278 cond
= u
? TCG_COND_LE
: TCG_COND_EQ
;
9280 case 0xb: /* ABS, NEG */
9282 tcg_gen_neg_i64(tcg_rd
, tcg_rn
);
9284 tcg_gen_abs_i64(tcg_rd
, tcg_rn
);
9287 case 0x2f: /* FABS */
9288 gen_helper_vfp_absd(tcg_rd
, tcg_rn
);
9290 case 0x6f: /* FNEG */
9291 gen_helper_vfp_negd(tcg_rd
, tcg_rn
);
9293 case 0x7f: /* FSQRT */
9294 gen_helper_vfp_sqrtd(tcg_rd
, tcg_rn
, cpu_env
);
9296 case 0x1a: /* FCVTNS */
9297 case 0x1b: /* FCVTMS */
9298 case 0x1c: /* FCVTAS */
9299 case 0x3a: /* FCVTPS */
9300 case 0x3b: /* FCVTZS */
9301 gen_helper_vfp_tosqd(tcg_rd
, tcg_rn
, tcg_constant_i32(0), tcg_fpstatus
);
9303 case 0x5a: /* FCVTNU */
9304 case 0x5b: /* FCVTMU */
9305 case 0x5c: /* FCVTAU */
9306 case 0x7a: /* FCVTPU */
9307 case 0x7b: /* FCVTZU */
9308 gen_helper_vfp_touqd(tcg_rd
, tcg_rn
, tcg_constant_i32(0), tcg_fpstatus
);
9310 case 0x18: /* FRINTN */
9311 case 0x19: /* FRINTM */
9312 case 0x38: /* FRINTP */
9313 case 0x39: /* FRINTZ */
9314 case 0x58: /* FRINTA */
9315 case 0x79: /* FRINTI */
9316 gen_helper_rintd(tcg_rd
, tcg_rn
, tcg_fpstatus
);
9318 case 0x59: /* FRINTX */
9319 gen_helper_rintd_exact(tcg_rd
, tcg_rn
, tcg_fpstatus
);
9321 case 0x1e: /* FRINT32Z */
9322 case 0x5e: /* FRINT32X */
9323 gen_helper_frint32_d(tcg_rd
, tcg_rn
, tcg_fpstatus
);
9325 case 0x1f: /* FRINT64Z */
9326 case 0x5f: /* FRINT64X */
9327 gen_helper_frint64_d(tcg_rd
, tcg_rn
, tcg_fpstatus
);
9330 g_assert_not_reached();
9334 static void handle_2misc_fcmp_zero(DisasContext
*s
, int opcode
,
9335 bool is_scalar
, bool is_u
, bool is_q
,
9336 int size
, int rn
, int rd
)
9338 bool is_double
= (size
== MO_64
);
9341 if (!fp_access_check(s
)) {
9345 fpst
= fpstatus_ptr(size
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
9348 TCGv_i64 tcg_op
= tcg_temp_new_i64();
9349 TCGv_i64 tcg_zero
= tcg_constant_i64(0);
9350 TCGv_i64 tcg_res
= tcg_temp_new_i64();
9351 NeonGenTwoDoubleOpFn
*genfn
;
9356 case 0x2e: /* FCMLT (zero) */
9359 case 0x2c: /* FCMGT (zero) */
9360 genfn
= gen_helper_neon_cgt_f64
;
9362 case 0x2d: /* FCMEQ (zero) */
9363 genfn
= gen_helper_neon_ceq_f64
;
9365 case 0x6d: /* FCMLE (zero) */
9368 case 0x6c: /* FCMGE (zero) */
9369 genfn
= gen_helper_neon_cge_f64
;
9372 g_assert_not_reached();
9375 for (pass
= 0; pass
< (is_scalar
? 1 : 2); pass
++) {
9376 read_vec_element(s
, tcg_op
, rn
, pass
, MO_64
);
9378 genfn(tcg_res
, tcg_zero
, tcg_op
, fpst
);
9380 genfn(tcg_res
, tcg_op
, tcg_zero
, fpst
);
9382 write_vec_element(s
, tcg_res
, rd
, pass
, MO_64
);
9385 clear_vec_high(s
, !is_scalar
, rd
);
9387 TCGv_i32 tcg_op
= tcg_temp_new_i32();
9388 TCGv_i32 tcg_zero
= tcg_constant_i32(0);
9389 TCGv_i32 tcg_res
= tcg_temp_new_i32();
9390 NeonGenTwoSingleOpFn
*genfn
;
9392 int pass
, maxpasses
;
9394 if (size
== MO_16
) {
9396 case 0x2e: /* FCMLT (zero) */
9399 case 0x2c: /* FCMGT (zero) */
9400 genfn
= gen_helper_advsimd_cgt_f16
;
9402 case 0x2d: /* FCMEQ (zero) */
9403 genfn
= gen_helper_advsimd_ceq_f16
;
9405 case 0x6d: /* FCMLE (zero) */
9408 case 0x6c: /* FCMGE (zero) */
9409 genfn
= gen_helper_advsimd_cge_f16
;
9412 g_assert_not_reached();
9416 case 0x2e: /* FCMLT (zero) */
9419 case 0x2c: /* FCMGT (zero) */
9420 genfn
= gen_helper_neon_cgt_f32
;
9422 case 0x2d: /* FCMEQ (zero) */
9423 genfn
= gen_helper_neon_ceq_f32
;
9425 case 0x6d: /* FCMLE (zero) */
9428 case 0x6c: /* FCMGE (zero) */
9429 genfn
= gen_helper_neon_cge_f32
;
9432 g_assert_not_reached();
9439 int vector_size
= 8 << is_q
;
9440 maxpasses
= vector_size
>> size
;
9443 for (pass
= 0; pass
< maxpasses
; pass
++) {
9444 read_vec_element_i32(s
, tcg_op
, rn
, pass
, size
);
9446 genfn(tcg_res
, tcg_zero
, tcg_op
, fpst
);
9448 genfn(tcg_res
, tcg_op
, tcg_zero
, fpst
);
9451 write_fp_sreg(s
, rd
, tcg_res
);
9453 write_vec_element_i32(s
, tcg_res
, rd
, pass
, size
);
9458 clear_vec_high(s
, is_q
, rd
);
9463 static void handle_2misc_reciprocal(DisasContext
*s
, int opcode
,
9464 bool is_scalar
, bool is_u
, bool is_q
,
9465 int size
, int rn
, int rd
)
9467 bool is_double
= (size
== 3);
9468 TCGv_ptr fpst
= fpstatus_ptr(FPST_FPCR
);
9471 TCGv_i64 tcg_op
= tcg_temp_new_i64();
9472 TCGv_i64 tcg_res
= tcg_temp_new_i64();
9475 for (pass
= 0; pass
< (is_scalar
? 1 : 2); pass
++) {
9476 read_vec_element(s
, tcg_op
, rn
, pass
, MO_64
);
9478 case 0x3d: /* FRECPE */
9479 gen_helper_recpe_f64(tcg_res
, tcg_op
, fpst
);
9481 case 0x3f: /* FRECPX */
9482 gen_helper_frecpx_f64(tcg_res
, tcg_op
, fpst
);
9484 case 0x7d: /* FRSQRTE */
9485 gen_helper_rsqrte_f64(tcg_res
, tcg_op
, fpst
);
9488 g_assert_not_reached();
9490 write_vec_element(s
, tcg_res
, rd
, pass
, MO_64
);
9492 clear_vec_high(s
, !is_scalar
, rd
);
9494 TCGv_i32 tcg_op
= tcg_temp_new_i32();
9495 TCGv_i32 tcg_res
= tcg_temp_new_i32();
9496 int pass
, maxpasses
;
9501 maxpasses
= is_q
? 4 : 2;
9504 for (pass
= 0; pass
< maxpasses
; pass
++) {
9505 read_vec_element_i32(s
, tcg_op
, rn
, pass
, MO_32
);
9508 case 0x3c: /* URECPE */
9509 gen_helper_recpe_u32(tcg_res
, tcg_op
);
9511 case 0x3d: /* FRECPE */
9512 gen_helper_recpe_f32(tcg_res
, tcg_op
, fpst
);
9514 case 0x3f: /* FRECPX */
9515 gen_helper_frecpx_f32(tcg_res
, tcg_op
, fpst
);
9517 case 0x7d: /* FRSQRTE */
9518 gen_helper_rsqrte_f32(tcg_res
, tcg_op
, fpst
);
9521 g_assert_not_reached();
9525 write_fp_sreg(s
, rd
, tcg_res
);
9527 write_vec_element_i32(s
, tcg_res
, rd
, pass
, MO_32
);
9531 clear_vec_high(s
, is_q
, rd
);
9536 static void handle_2misc_narrow(DisasContext
*s
, bool scalar
,
9537 int opcode
, bool u
, bool is_q
,
9538 int size
, int rn
, int rd
)
9540 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9541 * in the source becomes a size element in the destination).
9544 TCGv_i32 tcg_res
[2];
9545 int destelt
= is_q
? 2 : 0;
9546 int passes
= scalar
? 1 : 2;
9549 tcg_res
[1] = tcg_constant_i32(0);
9552 for (pass
= 0; pass
< passes
; pass
++) {
9553 TCGv_i64 tcg_op
= tcg_temp_new_i64();
9554 NeonGenNarrowFn
*genfn
= NULL
;
9555 NeonGenNarrowEnvFn
*genenvfn
= NULL
;
9558 read_vec_element(s
, tcg_op
, rn
, pass
, size
+ 1);
9560 read_vec_element(s
, tcg_op
, rn
, pass
, MO_64
);
9562 tcg_res
[pass
] = tcg_temp_new_i32();
9565 case 0x12: /* XTN, SQXTUN */
9567 static NeonGenNarrowFn
* const xtnfns
[3] = {
9568 gen_helper_neon_narrow_u8
,
9569 gen_helper_neon_narrow_u16
,
9570 tcg_gen_extrl_i64_i32
,
9572 static NeonGenNarrowEnvFn
* const sqxtunfns
[3] = {
9573 gen_helper_neon_unarrow_sat8
,
9574 gen_helper_neon_unarrow_sat16
,
9575 gen_helper_neon_unarrow_sat32
,
9578 genenvfn
= sqxtunfns
[size
];
9580 genfn
= xtnfns
[size
];
9584 case 0x14: /* SQXTN, UQXTN */
9586 static NeonGenNarrowEnvFn
* const fns
[3][2] = {
9587 { gen_helper_neon_narrow_sat_s8
,
9588 gen_helper_neon_narrow_sat_u8
},
9589 { gen_helper_neon_narrow_sat_s16
,
9590 gen_helper_neon_narrow_sat_u16
},
9591 { gen_helper_neon_narrow_sat_s32
,
9592 gen_helper_neon_narrow_sat_u32
},
9594 genenvfn
= fns
[size
][u
];
9597 case 0x16: /* FCVTN, FCVTN2 */
9598 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9600 gen_helper_vfp_fcvtsd(tcg_res
[pass
], tcg_op
, cpu_env
);
9602 TCGv_i32 tcg_lo
= tcg_temp_new_i32();
9603 TCGv_i32 tcg_hi
= tcg_temp_new_i32();
9604 TCGv_ptr fpst
= fpstatus_ptr(FPST_FPCR
);
9605 TCGv_i32 ahp
= get_ahp_flag();
9607 tcg_gen_extr_i64_i32(tcg_lo
, tcg_hi
, tcg_op
);
9608 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo
, tcg_lo
, fpst
, ahp
);
9609 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi
, tcg_hi
, fpst
, ahp
);
9610 tcg_gen_deposit_i32(tcg_res
[pass
], tcg_lo
, tcg_hi
, 16, 16);
9613 case 0x36: /* BFCVTN, BFCVTN2 */
9615 TCGv_ptr fpst
= fpstatus_ptr(FPST_FPCR
);
9616 gen_helper_bfcvt_pair(tcg_res
[pass
], tcg_op
, fpst
);
9619 case 0x56: /* FCVTXN, FCVTXN2 */
9620 /* 64 bit to 32 bit float conversion
9621 * with von Neumann rounding (round to odd)
9624 gen_helper_fcvtx_f64_to_f32(tcg_res
[pass
], tcg_op
, cpu_env
);
9627 g_assert_not_reached();
9631 genfn(tcg_res
[pass
], tcg_op
);
9632 } else if (genenvfn
) {
9633 genenvfn(tcg_res
[pass
], cpu_env
, tcg_op
);
9637 for (pass
= 0; pass
< 2; pass
++) {
9638 write_vec_element_i32(s
, tcg_res
[pass
], rd
, destelt
+ pass
, MO_32
);
9640 clear_vec_high(s
, is_q
, rd
);
9643 /* Remaining saturating accumulating ops */
9644 static void handle_2misc_satacc(DisasContext
*s
, bool is_scalar
, bool is_u
,
9645 bool is_q
, int size
, int rn
, int rd
)
9647 bool is_double
= (size
== 3);
9650 TCGv_i64 tcg_rn
= tcg_temp_new_i64();
9651 TCGv_i64 tcg_rd
= tcg_temp_new_i64();
9654 for (pass
= 0; pass
< (is_scalar
? 1 : 2); pass
++) {
9655 read_vec_element(s
, tcg_rn
, rn
, pass
, MO_64
);
9656 read_vec_element(s
, tcg_rd
, rd
, pass
, MO_64
);
9658 if (is_u
) { /* USQADD */
9659 gen_helper_neon_uqadd_s64(tcg_rd
, cpu_env
, tcg_rn
, tcg_rd
);
9660 } else { /* SUQADD */
9661 gen_helper_neon_sqadd_u64(tcg_rd
, cpu_env
, tcg_rn
, tcg_rd
);
9663 write_vec_element(s
, tcg_rd
, rd
, pass
, MO_64
);
9665 clear_vec_high(s
, !is_scalar
, rd
);
9667 TCGv_i32 tcg_rn
= tcg_temp_new_i32();
9668 TCGv_i32 tcg_rd
= tcg_temp_new_i32();
9669 int pass
, maxpasses
;
9674 maxpasses
= is_q
? 4 : 2;
9677 for (pass
= 0; pass
< maxpasses
; pass
++) {
9679 read_vec_element_i32(s
, tcg_rn
, rn
, pass
, size
);
9680 read_vec_element_i32(s
, tcg_rd
, rd
, pass
, size
);
9682 read_vec_element_i32(s
, tcg_rn
, rn
, pass
, MO_32
);
9683 read_vec_element_i32(s
, tcg_rd
, rd
, pass
, MO_32
);
9686 if (is_u
) { /* USQADD */
9689 gen_helper_neon_uqadd_s8(tcg_rd
, cpu_env
, tcg_rn
, tcg_rd
);
9692 gen_helper_neon_uqadd_s16(tcg_rd
, cpu_env
, tcg_rn
, tcg_rd
);
9695 gen_helper_neon_uqadd_s32(tcg_rd
, cpu_env
, tcg_rn
, tcg_rd
);
9698 g_assert_not_reached();
9700 } else { /* SUQADD */
9703 gen_helper_neon_sqadd_u8(tcg_rd
, cpu_env
, tcg_rn
, tcg_rd
);
9706 gen_helper_neon_sqadd_u16(tcg_rd
, cpu_env
, tcg_rn
, tcg_rd
);
9709 gen_helper_neon_sqadd_u32(tcg_rd
, cpu_env
, tcg_rn
, tcg_rd
);
9712 g_assert_not_reached();
9717 write_vec_element(s
, tcg_constant_i64(0), rd
, 0, MO_64
);
9719 write_vec_element_i32(s
, tcg_rd
, rd
, pass
, MO_32
);
9721 clear_vec_high(s
, is_q
, rd
);
9725 /* AdvSIMD scalar two reg misc
9726 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
9727 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9728 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
9729 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9731 static void disas_simd_scalar_two_reg_misc(DisasContext
*s
, uint32_t insn
)
9733 int rd
= extract32(insn
, 0, 5);
9734 int rn
= extract32(insn
, 5, 5);
9735 int opcode
= extract32(insn
, 12, 5);
9736 int size
= extract32(insn
, 22, 2);
9737 bool u
= extract32(insn
, 29, 1);
9738 bool is_fcvt
= false;
9741 TCGv_ptr tcg_fpstatus
;
9744 case 0x3: /* USQADD / SUQADD*/
9745 if (!fp_access_check(s
)) {
9748 handle_2misc_satacc(s
, true, u
, false, size
, rn
, rd
);
9750 case 0x7: /* SQABS / SQNEG */
9752 case 0xa: /* CMLT */
9754 unallocated_encoding(s
);
9758 case 0x8: /* CMGT, CMGE */
9759 case 0x9: /* CMEQ, CMLE */
9760 case 0xb: /* ABS, NEG */
9762 unallocated_encoding(s
);
9766 case 0x12: /* SQXTUN */
9768 unallocated_encoding(s
);
9772 case 0x14: /* SQXTN, UQXTN */
9774 unallocated_encoding(s
);
9777 if (!fp_access_check(s
)) {
9780 handle_2misc_narrow(s
, true, opcode
, u
, false, size
, rn
, rd
);
9785 /* Floating point: U, size[1] and opcode indicate operation;
9786 * size[0] indicates single or double precision.
9788 opcode
|= (extract32(size
, 1, 1) << 5) | (u
<< 6);
9789 size
= extract32(size
, 0, 1) ? 3 : 2;
9791 case 0x2c: /* FCMGT (zero) */
9792 case 0x2d: /* FCMEQ (zero) */
9793 case 0x2e: /* FCMLT (zero) */
9794 case 0x6c: /* FCMGE (zero) */
9795 case 0x6d: /* FCMLE (zero) */
9796 handle_2misc_fcmp_zero(s
, opcode
, true, u
, true, size
, rn
, rd
);
9798 case 0x1d: /* SCVTF */
9799 case 0x5d: /* UCVTF */
9801 bool is_signed
= (opcode
== 0x1d);
9802 if (!fp_access_check(s
)) {
9805 handle_simd_intfp_conv(s
, rd
, rn
, 1, is_signed
, 0, size
);
9808 case 0x3d: /* FRECPE */
9809 case 0x3f: /* FRECPX */
9810 case 0x7d: /* FRSQRTE */
9811 if (!fp_access_check(s
)) {
9814 handle_2misc_reciprocal(s
, opcode
, true, u
, true, size
, rn
, rd
);
9816 case 0x1a: /* FCVTNS */
9817 case 0x1b: /* FCVTMS */
9818 case 0x3a: /* FCVTPS */
9819 case 0x3b: /* FCVTZS */
9820 case 0x5a: /* FCVTNU */
9821 case 0x5b: /* FCVTMU */
9822 case 0x7a: /* FCVTPU */
9823 case 0x7b: /* FCVTZU */
9825 rmode
= extract32(opcode
, 5, 1) | (extract32(opcode
, 0, 1) << 1);
9827 case 0x1c: /* FCVTAS */
9828 case 0x5c: /* FCVTAU */
9829 /* TIEAWAY doesn't fit in the usual rounding mode encoding */
9831 rmode
= FPROUNDING_TIEAWAY
;
9833 case 0x56: /* FCVTXN, FCVTXN2 */
9835 unallocated_encoding(s
);
9838 if (!fp_access_check(s
)) {
9841 handle_2misc_narrow(s
, true, opcode
, u
, false, size
- 1, rn
, rd
);
9844 unallocated_encoding(s
);
9849 unallocated_encoding(s
);
9853 if (!fp_access_check(s
)) {
9858 tcg_fpstatus
= fpstatus_ptr(FPST_FPCR
);
9859 tcg_rmode
= gen_set_rmode(rmode
, tcg_fpstatus
);
9861 tcg_fpstatus
= NULL
;
9866 TCGv_i64 tcg_rn
= read_fp_dreg(s
, rn
);
9867 TCGv_i64 tcg_rd
= tcg_temp_new_i64();
9869 handle_2misc_64(s
, opcode
, u
, tcg_rd
, tcg_rn
, tcg_rmode
, tcg_fpstatus
);
9870 write_fp_dreg(s
, rd
, tcg_rd
);
9872 TCGv_i32 tcg_rn
= tcg_temp_new_i32();
9873 TCGv_i32 tcg_rd
= tcg_temp_new_i32();
9875 read_vec_element_i32(s
, tcg_rn
, rn
, 0, size
);
9878 case 0x7: /* SQABS, SQNEG */
9880 NeonGenOneOpEnvFn
*genfn
;
9881 static NeonGenOneOpEnvFn
* const fns
[3][2] = {
9882 { gen_helper_neon_qabs_s8
, gen_helper_neon_qneg_s8
},
9883 { gen_helper_neon_qabs_s16
, gen_helper_neon_qneg_s16
},
9884 { gen_helper_neon_qabs_s32
, gen_helper_neon_qneg_s32
},
9886 genfn
= fns
[size
][u
];
9887 genfn(tcg_rd
, cpu_env
, tcg_rn
);
9890 case 0x1a: /* FCVTNS */
9891 case 0x1b: /* FCVTMS */
9892 case 0x1c: /* FCVTAS */
9893 case 0x3a: /* FCVTPS */
9894 case 0x3b: /* FCVTZS */
9895 gen_helper_vfp_tosls(tcg_rd
, tcg_rn
, tcg_constant_i32(0),
9898 case 0x5a: /* FCVTNU */
9899 case 0x5b: /* FCVTMU */
9900 case 0x5c: /* FCVTAU */
9901 case 0x7a: /* FCVTPU */
9902 case 0x7b: /* FCVTZU */
9903 gen_helper_vfp_touls(tcg_rd
, tcg_rn
, tcg_constant_i32(0),
9907 g_assert_not_reached();
9910 write_fp_sreg(s
, rd
, tcg_rd
);
9914 gen_restore_rmode(tcg_rmode
, tcg_fpstatus
);
9918 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
9919 static void handle_vec_simd_shri(DisasContext
*s
, bool is_q
, bool is_u
,
9920 int immh
, int immb
, int opcode
, int rn
, int rd
)
9922 int size
= 32 - clz32(immh
) - 1;
9923 int immhb
= immh
<< 3 | immb
;
9924 int shift
= 2 * (8 << size
) - immhb
;
9925 GVecGen2iFn
*gvec_fn
;
9927 if (extract32(immh
, 3, 1) && !is_q
) {
9928 unallocated_encoding(s
);
9931 tcg_debug_assert(size
<= 3);
9933 if (!fp_access_check(s
)) {
9938 case 0x02: /* SSRA / USRA (accumulate) */
9939 gvec_fn
= is_u
? gen_gvec_usra
: gen_gvec_ssra
;
9942 case 0x08: /* SRI */
9943 gvec_fn
= gen_gvec_sri
;
9946 case 0x00: /* SSHR / USHR */
9948 if (shift
== 8 << size
) {
9949 /* Shift count the same size as element size produces zero. */
9950 tcg_gen_gvec_dup_imm(size
, vec_full_reg_offset(s
, rd
),
9951 is_q
? 16 : 8, vec_full_reg_size(s
), 0);
9954 gvec_fn
= tcg_gen_gvec_shri
;
9956 /* Shift count the same size as element size produces all sign. */
9957 if (shift
== 8 << size
) {
9960 gvec_fn
= tcg_gen_gvec_sari
;
9964 case 0x04: /* SRSHR / URSHR (rounding) */
9965 gvec_fn
= is_u
? gen_gvec_urshr
: gen_gvec_srshr
;
9968 case 0x06: /* SRSRA / URSRA (accum + rounding) */
9969 gvec_fn
= is_u
? gen_gvec_ursra
: gen_gvec_srsra
;
9973 g_assert_not_reached();
9976 gen_gvec_fn2i(s
, is_q
, rd
, rn
, shift
, gvec_fn
, size
);
9979 /* SHL/SLI - Vector shift left */
9980 static void handle_vec_simd_shli(DisasContext
*s
, bool is_q
, bool insert
,
9981 int immh
, int immb
, int opcode
, int rn
, int rd
)
9983 int size
= 32 - clz32(immh
) - 1;
9984 int immhb
= immh
<< 3 | immb
;
9985 int shift
= immhb
- (8 << size
);
9987 /* Range of size is limited by decode: immh is a non-zero 4 bit field */
9988 assert(size
>= 0 && size
<= 3);
9990 if (extract32(immh
, 3, 1) && !is_q
) {
9991 unallocated_encoding(s
);
9995 if (!fp_access_check(s
)) {
10000 gen_gvec_fn2i(s
, is_q
, rd
, rn
, shift
, gen_gvec_sli
, size
);
10002 gen_gvec_fn2i(s
, is_q
, rd
, rn
, shift
, tcg_gen_gvec_shli
, size
);
10006 /* USHLL/SHLL - Vector shift left with widening */
10007 static void handle_vec_simd_wshli(DisasContext
*s
, bool is_q
, bool is_u
,
10008 int immh
, int immb
, int opcode
, int rn
, int rd
)
10010 int size
= 32 - clz32(immh
) - 1;
10011 int immhb
= immh
<< 3 | immb
;
10012 int shift
= immhb
- (8 << size
);
10014 int esize
= 8 << size
;
10015 int elements
= dsize
/esize
;
10016 TCGv_i64 tcg_rn
= tcg_temp_new_i64();
10017 TCGv_i64 tcg_rd
= tcg_temp_new_i64();
10021 unallocated_encoding(s
);
10025 if (!fp_access_check(s
)) {
10029 /* For the LL variants the store is larger than the load,
10030 * so if rd == rn we would overwrite parts of our input.
10031 * So load everything right now and use shifts in the main loop.
10033 read_vec_element(s
, tcg_rn
, rn
, is_q
? 1 : 0, MO_64
);
10035 for (i
= 0; i
< elements
; i
++) {
10036 tcg_gen_shri_i64(tcg_rd
, tcg_rn
, i
* esize
);
10037 ext_and_shift_reg(tcg_rd
, tcg_rd
, size
| (!is_u
<< 2), 0);
10038 tcg_gen_shli_i64(tcg_rd
, tcg_rd
, shift
);
10039 write_vec_element(s
, tcg_rd
, rd
, i
, size
+ 1);
10043 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10044 static void handle_vec_simd_shrn(DisasContext
*s
, bool is_q
,
10045 int immh
, int immb
, int opcode
, int rn
, int rd
)
10047 int immhb
= immh
<< 3 | immb
;
10048 int size
= 32 - clz32(immh
) - 1;
10050 int esize
= 8 << size
;
10051 int elements
= dsize
/esize
;
10052 int shift
= (2 * esize
) - immhb
;
10053 bool round
= extract32(opcode
, 0, 1);
10054 TCGv_i64 tcg_rn
, tcg_rd
, tcg_final
;
10055 TCGv_i64 tcg_round
;
10058 if (extract32(immh
, 3, 1)) {
10059 unallocated_encoding(s
);
10063 if (!fp_access_check(s
)) {
10067 tcg_rn
= tcg_temp_new_i64();
10068 tcg_rd
= tcg_temp_new_i64();
10069 tcg_final
= tcg_temp_new_i64();
10070 read_vec_element(s
, tcg_final
, rd
, is_q
? 1 : 0, MO_64
);
10073 tcg_round
= tcg_constant_i64(1ULL << (shift
- 1));
10078 for (i
= 0; i
< elements
; i
++) {
10079 read_vec_element(s
, tcg_rn
, rn
, i
, size
+1);
10080 handle_shri_with_rndacc(tcg_rd
, tcg_rn
, tcg_round
,
10081 false, true, size
+1, shift
);
10083 tcg_gen_deposit_i64(tcg_final
, tcg_final
, tcg_rd
, esize
* i
, esize
);
10087 write_vec_element(s
, tcg_final
, rd
, 0, MO_64
);
10089 write_vec_element(s
, tcg_final
, rd
, 1, MO_64
);
10092 clear_vec_high(s
, is_q
, rd
);
10096 /* AdvSIMD shift by immediate
10097 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
10098 * +---+---+---+-------------+------+------+--------+---+------+------+
10099 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
10100 * +---+---+---+-------------+------+------+--------+---+------+------+
10102 static void disas_simd_shift_imm(DisasContext
*s
, uint32_t insn
)
10104 int rd
= extract32(insn
, 0, 5);
10105 int rn
= extract32(insn
, 5, 5);
10106 int opcode
= extract32(insn
, 11, 5);
10107 int immb
= extract32(insn
, 16, 3);
10108 int immh
= extract32(insn
, 19, 4);
10109 bool is_u
= extract32(insn
, 29, 1);
10110 bool is_q
= extract32(insn
, 30, 1);
10112 /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10116 case 0x08: /* SRI */
10118 unallocated_encoding(s
);
10122 case 0x00: /* SSHR / USHR */
10123 case 0x02: /* SSRA / USRA (accumulate) */
10124 case 0x04: /* SRSHR / URSHR (rounding) */
10125 case 0x06: /* SRSRA / URSRA (accum + rounding) */
10126 handle_vec_simd_shri(s
, is_q
, is_u
, immh
, immb
, opcode
, rn
, rd
);
10128 case 0x0a: /* SHL / SLI */
10129 handle_vec_simd_shli(s
, is_q
, is_u
, immh
, immb
, opcode
, rn
, rd
);
10131 case 0x10: /* SHRN */
10132 case 0x11: /* RSHRN / SQRSHRUN */
10134 handle_vec_simd_sqshrn(s
, false, is_q
, false, true, immh
, immb
,
10137 handle_vec_simd_shrn(s
, is_q
, immh
, immb
, opcode
, rn
, rd
);
10140 case 0x12: /* SQSHRN / UQSHRN */
10141 case 0x13: /* SQRSHRN / UQRSHRN */
10142 handle_vec_simd_sqshrn(s
, false, is_q
, is_u
, is_u
, immh
, immb
,
10145 case 0x14: /* SSHLL / USHLL */
10146 handle_vec_simd_wshli(s
, is_q
, is_u
, immh
, immb
, opcode
, rn
, rd
);
10148 case 0x1c: /* SCVTF / UCVTF */
10149 handle_simd_shift_intfp_conv(s
, false, is_q
, is_u
, immh
, immb
,
10152 case 0xc: /* SQSHLU */
10154 unallocated_encoding(s
);
10157 handle_simd_qshl(s
, false, is_q
, false, true, immh
, immb
, rn
, rd
);
10159 case 0xe: /* SQSHL, UQSHL */
10160 handle_simd_qshl(s
, false, is_q
, is_u
, is_u
, immh
, immb
, rn
, rd
);
10162 case 0x1f: /* FCVTZS/ FCVTZU */
10163 handle_simd_shift_fpint_conv(s
, false, is_q
, is_u
, immh
, immb
, rn
, rd
);
10166 unallocated_encoding(s
);
10171 /* Generate code to do a "long" addition or subtraction, ie one done in
10172 * TCGv_i64 on vector lanes twice the width specified by size.
10174 static void gen_neon_addl(int size
, bool is_sub
, TCGv_i64 tcg_res
,
10175 TCGv_i64 tcg_op1
, TCGv_i64 tcg_op2
)
10177 static NeonGenTwo64OpFn
* const fns
[3][2] = {
10178 { gen_helper_neon_addl_u16
, gen_helper_neon_subl_u16
},
10179 { gen_helper_neon_addl_u32
, gen_helper_neon_subl_u32
},
10180 { tcg_gen_add_i64
, tcg_gen_sub_i64
},
10182 NeonGenTwo64OpFn
*genfn
;
10185 genfn
= fns
[size
][is_sub
];
10186 genfn(tcg_res
, tcg_op1
, tcg_op2
);
10189 static void handle_3rd_widening(DisasContext
*s
, int is_q
, int is_u
, int size
,
10190 int opcode
, int rd
, int rn
, int rm
)
10192 /* 3-reg-different widening insns: 64 x 64 -> 128 */
10193 TCGv_i64 tcg_res
[2];
10196 tcg_res
[0] = tcg_temp_new_i64();
10197 tcg_res
[1] = tcg_temp_new_i64();
10199 /* Does this op do an adding accumulate, a subtracting accumulate,
10200 * or no accumulate at all?
10218 read_vec_element(s
, tcg_res
[0], rd
, 0, MO_64
);
10219 read_vec_element(s
, tcg_res
[1], rd
, 1, MO_64
);
10222 /* size == 2 means two 32x32->64 operations; this is worth special
10223 * casing because we can generally handle it inline.
10226 for (pass
= 0; pass
< 2; pass
++) {
10227 TCGv_i64 tcg_op1
= tcg_temp_new_i64();
10228 TCGv_i64 tcg_op2
= tcg_temp_new_i64();
10229 TCGv_i64 tcg_passres
;
10230 MemOp memop
= MO_32
| (is_u
? 0 : MO_SIGN
);
10232 int elt
= pass
+ is_q
* 2;
10234 read_vec_element(s
, tcg_op1
, rn
, elt
, memop
);
10235 read_vec_element(s
, tcg_op2
, rm
, elt
, memop
);
10238 tcg_passres
= tcg_res
[pass
];
10240 tcg_passres
= tcg_temp_new_i64();
10244 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10245 tcg_gen_add_i64(tcg_passres
, tcg_op1
, tcg_op2
);
10247 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10248 tcg_gen_sub_i64(tcg_passres
, tcg_op1
, tcg_op2
);
10250 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10251 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10253 TCGv_i64 tcg_tmp1
= tcg_temp_new_i64();
10254 TCGv_i64 tcg_tmp2
= tcg_temp_new_i64();
10256 tcg_gen_sub_i64(tcg_tmp1
, tcg_op1
, tcg_op2
);
10257 tcg_gen_sub_i64(tcg_tmp2
, tcg_op2
, tcg_op1
);
10258 tcg_gen_movcond_i64(is_u
? TCG_COND_GEU
: TCG_COND_GE
,
10260 tcg_op1
, tcg_op2
, tcg_tmp1
, tcg_tmp2
);
10263 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10264 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10265 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10266 tcg_gen_mul_i64(tcg_passres
, tcg_op1
, tcg_op2
);
10268 case 9: /* SQDMLAL, SQDMLAL2 */
10269 case 11: /* SQDMLSL, SQDMLSL2 */
10270 case 13: /* SQDMULL, SQDMULL2 */
10271 tcg_gen_mul_i64(tcg_passres
, tcg_op1
, tcg_op2
);
10272 gen_helper_neon_addl_saturate_s64(tcg_passres
, cpu_env
,
10273 tcg_passres
, tcg_passres
);
10276 g_assert_not_reached();
10279 if (opcode
== 9 || opcode
== 11) {
10280 /* saturating accumulate ops */
10282 tcg_gen_neg_i64(tcg_passres
, tcg_passres
);
10284 gen_helper_neon_addl_saturate_s64(tcg_res
[pass
], cpu_env
,
10285 tcg_res
[pass
], tcg_passres
);
10286 } else if (accop
> 0) {
10287 tcg_gen_add_i64(tcg_res
[pass
], tcg_res
[pass
], tcg_passres
);
10288 } else if (accop
< 0) {
10289 tcg_gen_sub_i64(tcg_res
[pass
], tcg_res
[pass
], tcg_passres
);
10293 /* size 0 or 1, generally helper functions */
10294 for (pass
= 0; pass
< 2; pass
++) {
10295 TCGv_i32 tcg_op1
= tcg_temp_new_i32();
10296 TCGv_i32 tcg_op2
= tcg_temp_new_i32();
10297 TCGv_i64 tcg_passres
;
10298 int elt
= pass
+ is_q
* 2;
10300 read_vec_element_i32(s
, tcg_op1
, rn
, elt
, MO_32
);
10301 read_vec_element_i32(s
, tcg_op2
, rm
, elt
, MO_32
);
10304 tcg_passres
= tcg_res
[pass
];
10306 tcg_passres
= tcg_temp_new_i64();
10310 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10311 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10313 TCGv_i64 tcg_op2_64
= tcg_temp_new_i64();
10314 static NeonGenWidenFn
* const widenfns
[2][2] = {
10315 { gen_helper_neon_widen_s8
, gen_helper_neon_widen_u8
},
10316 { gen_helper_neon_widen_s16
, gen_helper_neon_widen_u16
},
10318 NeonGenWidenFn
*widenfn
= widenfns
[size
][is_u
];
10320 widenfn(tcg_op2_64
, tcg_op2
);
10321 widenfn(tcg_passres
, tcg_op1
);
10322 gen_neon_addl(size
, (opcode
== 2), tcg_passres
,
10323 tcg_passres
, tcg_op2_64
);
10326 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10327 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10330 gen_helper_neon_abdl_u16(tcg_passres
, tcg_op1
, tcg_op2
);
10332 gen_helper_neon_abdl_s16(tcg_passres
, tcg_op1
, tcg_op2
);
10336 gen_helper_neon_abdl_u32(tcg_passres
, tcg_op1
, tcg_op2
);
10338 gen_helper_neon_abdl_s32(tcg_passres
, tcg_op1
, tcg_op2
);
10342 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10343 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10344 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10347 gen_helper_neon_mull_u8(tcg_passres
, tcg_op1
, tcg_op2
);
10349 gen_helper_neon_mull_s8(tcg_passres
, tcg_op1
, tcg_op2
);
10353 gen_helper_neon_mull_u16(tcg_passres
, tcg_op1
, tcg_op2
);
10355 gen_helper_neon_mull_s16(tcg_passres
, tcg_op1
, tcg_op2
);
10359 case 9: /* SQDMLAL, SQDMLAL2 */
10360 case 11: /* SQDMLSL, SQDMLSL2 */
10361 case 13: /* SQDMULL, SQDMULL2 */
10363 gen_helper_neon_mull_s16(tcg_passres
, tcg_op1
, tcg_op2
);
10364 gen_helper_neon_addl_saturate_s32(tcg_passres
, cpu_env
,
10365 tcg_passres
, tcg_passres
);
10368 g_assert_not_reached();
10372 if (opcode
== 9 || opcode
== 11) {
10373 /* saturating accumulate ops */
10375 gen_helper_neon_negl_u32(tcg_passres
, tcg_passres
);
10377 gen_helper_neon_addl_saturate_s32(tcg_res
[pass
], cpu_env
,
10381 gen_neon_addl(size
, (accop
< 0), tcg_res
[pass
],
10382 tcg_res
[pass
], tcg_passres
);
10388 write_vec_element(s
, tcg_res
[0], rd
, 0, MO_64
);
10389 write_vec_element(s
, tcg_res
[1], rd
, 1, MO_64
);
10392 static void handle_3rd_wide(DisasContext
*s
, int is_q
, int is_u
, int size
,
10393 int opcode
, int rd
, int rn
, int rm
)
10395 TCGv_i64 tcg_res
[2];
10396 int part
= is_q
? 2 : 0;
10399 for (pass
= 0; pass
< 2; pass
++) {
10400 TCGv_i64 tcg_op1
= tcg_temp_new_i64();
10401 TCGv_i32 tcg_op2
= tcg_temp_new_i32();
10402 TCGv_i64 tcg_op2_wide
= tcg_temp_new_i64();
10403 static NeonGenWidenFn
* const widenfns
[3][2] = {
10404 { gen_helper_neon_widen_s8
, gen_helper_neon_widen_u8
},
10405 { gen_helper_neon_widen_s16
, gen_helper_neon_widen_u16
},
10406 { tcg_gen_ext_i32_i64
, tcg_gen_extu_i32_i64
},
10408 NeonGenWidenFn
*widenfn
= widenfns
[size
][is_u
];
10410 read_vec_element(s
, tcg_op1
, rn
, pass
, MO_64
);
10411 read_vec_element_i32(s
, tcg_op2
, rm
, part
+ pass
, MO_32
);
10412 widenfn(tcg_op2_wide
, tcg_op2
);
10413 tcg_res
[pass
] = tcg_temp_new_i64();
10414 gen_neon_addl(size
, (opcode
== 3),
10415 tcg_res
[pass
], tcg_op1
, tcg_op2_wide
);
10418 for (pass
= 0; pass
< 2; pass
++) {
10419 write_vec_element(s
, tcg_res
[pass
], rd
, pass
, MO_64
);
10423 static void do_narrow_round_high_u32(TCGv_i32 res
, TCGv_i64 in
)
10425 tcg_gen_addi_i64(in
, in
, 1U << 31);
10426 tcg_gen_extrh_i64_i32(res
, in
);
10429 static void handle_3rd_narrowing(DisasContext
*s
, int is_q
, int is_u
, int size
,
10430 int opcode
, int rd
, int rn
, int rm
)
10432 TCGv_i32 tcg_res
[2];
10433 int part
= is_q
? 2 : 0;
10436 for (pass
= 0; pass
< 2; pass
++) {
10437 TCGv_i64 tcg_op1
= tcg_temp_new_i64();
10438 TCGv_i64 tcg_op2
= tcg_temp_new_i64();
10439 TCGv_i64 tcg_wideres
= tcg_temp_new_i64();
10440 static NeonGenNarrowFn
* const narrowfns
[3][2] = {
10441 { gen_helper_neon_narrow_high_u8
,
10442 gen_helper_neon_narrow_round_high_u8
},
10443 { gen_helper_neon_narrow_high_u16
,
10444 gen_helper_neon_narrow_round_high_u16
},
10445 { tcg_gen_extrh_i64_i32
, do_narrow_round_high_u32
},
10447 NeonGenNarrowFn
*gennarrow
= narrowfns
[size
][is_u
];
10449 read_vec_element(s
, tcg_op1
, rn
, pass
, MO_64
);
10450 read_vec_element(s
, tcg_op2
, rm
, pass
, MO_64
);
10452 gen_neon_addl(size
, (opcode
== 6), tcg_wideres
, tcg_op1
, tcg_op2
);
10454 tcg_res
[pass
] = tcg_temp_new_i32();
10455 gennarrow(tcg_res
[pass
], tcg_wideres
);
10458 for (pass
= 0; pass
< 2; pass
++) {
10459 write_vec_element_i32(s
, tcg_res
[pass
], rd
, pass
+ part
, MO_32
);
10461 clear_vec_high(s
, is_q
, rd
);
10464 /* AdvSIMD three different
10465 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
10466 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10467 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
10468 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10470 static void disas_simd_three_reg_diff(DisasContext
*s
, uint32_t insn
)
10472 /* Instructions in this group fall into three basic classes
10473 * (in each case with the operation working on each element in
10474 * the input vectors):
10475 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10477 * (2) wide 64 x 128 -> 128
10478 * (3) narrowing 128 x 128 -> 64
10479 * Here we do initial decode, catch unallocated cases and
10480 * dispatch to separate functions for each class.
10482 int is_q
= extract32(insn
, 30, 1);
10483 int is_u
= extract32(insn
, 29, 1);
10484 int size
= extract32(insn
, 22, 2);
10485 int opcode
= extract32(insn
, 12, 4);
10486 int rm
= extract32(insn
, 16, 5);
10487 int rn
= extract32(insn
, 5, 5);
10488 int rd
= extract32(insn
, 0, 5);
10491 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10492 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10493 /* 64 x 128 -> 128 */
10495 unallocated_encoding(s
);
10498 if (!fp_access_check(s
)) {
10501 handle_3rd_wide(s
, is_q
, is_u
, size
, opcode
, rd
, rn
, rm
);
10503 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10504 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10505 /* 128 x 128 -> 64 */
10507 unallocated_encoding(s
);
10510 if (!fp_access_check(s
)) {
10513 handle_3rd_narrowing(s
, is_q
, is_u
, size
, opcode
, rd
, rn
, rm
);
10515 case 14: /* PMULL, PMULL2 */
10517 unallocated_encoding(s
);
10521 case 0: /* PMULL.P8 */
10522 if (!fp_access_check(s
)) {
10525 /* The Q field specifies lo/hi half input for this insn. */
10526 gen_gvec_op3_ool(s
, true, rd
, rn
, rm
, is_q
,
10527 gen_helper_neon_pmull_h
);
10530 case 3: /* PMULL.P64 */
10531 if (!dc_isar_feature(aa64_pmull
, s
)) {
10532 unallocated_encoding(s
);
10535 if (!fp_access_check(s
)) {
10538 /* The Q field specifies lo/hi half input for this insn. */
10539 gen_gvec_op3_ool(s
, true, rd
, rn
, rm
, is_q
,
10540 gen_helper_gvec_pmull_q
);
10544 unallocated_encoding(s
);
10548 case 9: /* SQDMLAL, SQDMLAL2 */
10549 case 11: /* SQDMLSL, SQDMLSL2 */
10550 case 13: /* SQDMULL, SQDMULL2 */
10551 if (is_u
|| size
== 0) {
10552 unallocated_encoding(s
);
10556 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10557 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10558 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10559 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10560 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10561 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10562 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10563 /* 64 x 64 -> 128 */
10565 unallocated_encoding(s
);
10568 if (!fp_access_check(s
)) {
10572 handle_3rd_widening(s
, is_q
, is_u
, size
, opcode
, rd
, rn
, rm
);
10575 /* opcode 15 not allocated */
10576 unallocated_encoding(s
);
10581 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10582 static void disas_simd_3same_logic(DisasContext
*s
, uint32_t insn
)
10584 int rd
= extract32(insn
, 0, 5);
10585 int rn
= extract32(insn
, 5, 5);
10586 int rm
= extract32(insn
, 16, 5);
10587 int size
= extract32(insn
, 22, 2);
10588 bool is_u
= extract32(insn
, 29, 1);
10589 bool is_q
= extract32(insn
, 30, 1);
10591 if (!fp_access_check(s
)) {
10595 switch (size
+ 4 * is_u
) {
10597 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_and
, 0);
10600 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_andc
, 0);
10603 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_or
, 0);
10606 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_orc
, 0);
10609 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_xor
, 0);
10612 case 5: /* BSL bitwise select */
10613 gen_gvec_fn4(s
, is_q
, rd
, rd
, rn
, rm
, tcg_gen_gvec_bitsel
, 0);
10615 case 6: /* BIT, bitwise insert if true */
10616 gen_gvec_fn4(s
, is_q
, rd
, rm
, rn
, rd
, tcg_gen_gvec_bitsel
, 0);
10618 case 7: /* BIF, bitwise insert if false */
10619 gen_gvec_fn4(s
, is_q
, rd
, rm
, rd
, rn
, tcg_gen_gvec_bitsel
, 0);
10623 g_assert_not_reached();
10627 /* Pairwise op subgroup of C3.6.16.
10629 * This is called directly or via the handle_3same_float for float pairwise
10630 * operations where the opcode and size are calculated differently.
10632 static void handle_simd_3same_pair(DisasContext
*s
, int is_q
, int u
, int opcode
,
10633 int size
, int rn
, int rm
, int rd
)
10638 /* Floating point operations need fpst */
10639 if (opcode
>= 0x58) {
10640 fpst
= fpstatus_ptr(FPST_FPCR
);
10645 if (!fp_access_check(s
)) {
10649 /* These operations work on the concatenated rm:rn, with each pair of
10650 * adjacent elements being operated on to produce an element in the result.
10653 TCGv_i64 tcg_res
[2];
10655 for (pass
= 0; pass
< 2; pass
++) {
10656 TCGv_i64 tcg_op1
= tcg_temp_new_i64();
10657 TCGv_i64 tcg_op2
= tcg_temp_new_i64();
10658 int passreg
= (pass
== 0) ? rn
: rm
;
10660 read_vec_element(s
, tcg_op1
, passreg
, 0, MO_64
);
10661 read_vec_element(s
, tcg_op2
, passreg
, 1, MO_64
);
10662 tcg_res
[pass
] = tcg_temp_new_i64();
10665 case 0x17: /* ADDP */
10666 tcg_gen_add_i64(tcg_res
[pass
], tcg_op1
, tcg_op2
);
10668 case 0x58: /* FMAXNMP */
10669 gen_helper_vfp_maxnumd(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
10671 case 0x5a: /* FADDP */
10672 gen_helper_vfp_addd(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
10674 case 0x5e: /* FMAXP */
10675 gen_helper_vfp_maxd(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
10677 case 0x78: /* FMINNMP */
10678 gen_helper_vfp_minnumd(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
10680 case 0x7e: /* FMINP */
10681 gen_helper_vfp_mind(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
10684 g_assert_not_reached();
10688 for (pass
= 0; pass
< 2; pass
++) {
10689 write_vec_element(s
, tcg_res
[pass
], rd
, pass
, MO_64
);
10692 int maxpass
= is_q
? 4 : 2;
10693 TCGv_i32 tcg_res
[4];
10695 for (pass
= 0; pass
< maxpass
; pass
++) {
10696 TCGv_i32 tcg_op1
= tcg_temp_new_i32();
10697 TCGv_i32 tcg_op2
= tcg_temp_new_i32();
10698 NeonGenTwoOpFn
*genfn
= NULL
;
10699 int passreg
= pass
< (maxpass
/ 2) ? rn
: rm
;
10700 int passelt
= (is_q
&& (pass
& 1)) ? 2 : 0;
10702 read_vec_element_i32(s
, tcg_op1
, passreg
, passelt
, MO_32
);
10703 read_vec_element_i32(s
, tcg_op2
, passreg
, passelt
+ 1, MO_32
);
10704 tcg_res
[pass
] = tcg_temp_new_i32();
10707 case 0x17: /* ADDP */
10709 static NeonGenTwoOpFn
* const fns
[3] = {
10710 gen_helper_neon_padd_u8
,
10711 gen_helper_neon_padd_u16
,
10717 case 0x14: /* SMAXP, UMAXP */
10719 static NeonGenTwoOpFn
* const fns
[3][2] = {
10720 { gen_helper_neon_pmax_s8
, gen_helper_neon_pmax_u8
},
10721 { gen_helper_neon_pmax_s16
, gen_helper_neon_pmax_u16
},
10722 { tcg_gen_smax_i32
, tcg_gen_umax_i32
},
10724 genfn
= fns
[size
][u
];
10727 case 0x15: /* SMINP, UMINP */
10729 static NeonGenTwoOpFn
* const fns
[3][2] = {
10730 { gen_helper_neon_pmin_s8
, gen_helper_neon_pmin_u8
},
10731 { gen_helper_neon_pmin_s16
, gen_helper_neon_pmin_u16
},
10732 { tcg_gen_smin_i32
, tcg_gen_umin_i32
},
10734 genfn
= fns
[size
][u
];
10737 /* The FP operations are all on single floats (32 bit) */
10738 case 0x58: /* FMAXNMP */
10739 gen_helper_vfp_maxnums(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
10741 case 0x5a: /* FADDP */
10742 gen_helper_vfp_adds(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
10744 case 0x5e: /* FMAXP */
10745 gen_helper_vfp_maxs(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
10747 case 0x78: /* FMINNMP */
10748 gen_helper_vfp_minnums(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
10750 case 0x7e: /* FMINP */
10751 gen_helper_vfp_mins(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
10754 g_assert_not_reached();
10757 /* FP ops called directly, otherwise call now */
10759 genfn(tcg_res
[pass
], tcg_op1
, tcg_op2
);
10763 for (pass
= 0; pass
< maxpass
; pass
++) {
10764 write_vec_element_i32(s
, tcg_res
[pass
], rd
, pass
, MO_32
);
10766 clear_vec_high(s
, is_q
, rd
);
10770 /* Floating point op subgroup of C3.6.16. */
10771 static void disas_simd_3same_float(DisasContext
*s
, uint32_t insn
)
10773 /* For floating point ops, the U, size[1] and opcode bits
10774 * together indicate the operation. size[0] indicates single
10777 int fpopcode
= extract32(insn
, 11, 5)
10778 | (extract32(insn
, 23, 1) << 5)
10779 | (extract32(insn
, 29, 1) << 6);
10780 int is_q
= extract32(insn
, 30, 1);
10781 int size
= extract32(insn
, 22, 1);
10782 int rm
= extract32(insn
, 16, 5);
10783 int rn
= extract32(insn
, 5, 5);
10784 int rd
= extract32(insn
, 0, 5);
10786 int datasize
= is_q
? 128 : 64;
10787 int esize
= 32 << size
;
10788 int elements
= datasize
/ esize
;
10790 if (size
== 1 && !is_q
) {
10791 unallocated_encoding(s
);
10795 switch (fpopcode
) {
10796 case 0x58: /* FMAXNMP */
10797 case 0x5a: /* FADDP */
10798 case 0x5e: /* FMAXP */
10799 case 0x78: /* FMINNMP */
10800 case 0x7e: /* FMINP */
10801 if (size
&& !is_q
) {
10802 unallocated_encoding(s
);
10805 handle_simd_3same_pair(s
, is_q
, 0, fpopcode
, size
? MO_64
: MO_32
,
10808 case 0x1b: /* FMULX */
10809 case 0x1f: /* FRECPS */
10810 case 0x3f: /* FRSQRTS */
10811 case 0x5d: /* FACGE */
10812 case 0x7d: /* FACGT */
10813 case 0x19: /* FMLA */
10814 case 0x39: /* FMLS */
10815 case 0x18: /* FMAXNM */
10816 case 0x1a: /* FADD */
10817 case 0x1c: /* FCMEQ */
10818 case 0x1e: /* FMAX */
10819 case 0x38: /* FMINNM */
10820 case 0x3a: /* FSUB */
10821 case 0x3e: /* FMIN */
10822 case 0x5b: /* FMUL */
10823 case 0x5c: /* FCMGE */
10824 case 0x5f: /* FDIV */
10825 case 0x7a: /* FABD */
10826 case 0x7c: /* FCMGT */
10827 if (!fp_access_check(s
)) {
10830 handle_3same_float(s
, size
, elements
, fpopcode
, rd
, rn
, rm
);
10833 case 0x1d: /* FMLAL */
10834 case 0x3d: /* FMLSL */
10835 case 0x59: /* FMLAL2 */
10836 case 0x79: /* FMLSL2 */
10837 if (size
& 1 || !dc_isar_feature(aa64_fhm
, s
)) {
10838 unallocated_encoding(s
);
10841 if (fp_access_check(s
)) {
10842 int is_s
= extract32(insn
, 23, 1);
10843 int is_2
= extract32(insn
, 29, 1);
10844 int data
= (is_2
<< 1) | is_s
;
10845 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
10846 vec_full_reg_offset(s
, rn
),
10847 vec_full_reg_offset(s
, rm
), cpu_env
,
10848 is_q
? 16 : 8, vec_full_reg_size(s
),
10849 data
, gen_helper_gvec_fmlal_a64
);
10854 unallocated_encoding(s
);
10859 /* Integer op subgroup of C3.6.16. */
10860 static void disas_simd_3same_int(DisasContext
*s
, uint32_t insn
)
10862 int is_q
= extract32(insn
, 30, 1);
10863 int u
= extract32(insn
, 29, 1);
10864 int size
= extract32(insn
, 22, 2);
10865 int opcode
= extract32(insn
, 11, 5);
10866 int rm
= extract32(insn
, 16, 5);
10867 int rn
= extract32(insn
, 5, 5);
10868 int rd
= extract32(insn
, 0, 5);
10873 case 0x13: /* MUL, PMUL */
10874 if (u
&& size
!= 0) {
10875 unallocated_encoding(s
);
10879 case 0x0: /* SHADD, UHADD */
10880 case 0x2: /* SRHADD, URHADD */
10881 case 0x4: /* SHSUB, UHSUB */
10882 case 0xc: /* SMAX, UMAX */
10883 case 0xd: /* SMIN, UMIN */
10884 case 0xe: /* SABD, UABD */
10885 case 0xf: /* SABA, UABA */
10886 case 0x12: /* MLA, MLS */
10888 unallocated_encoding(s
);
10892 case 0x16: /* SQDMULH, SQRDMULH */
10893 if (size
== 0 || size
== 3) {
10894 unallocated_encoding(s
);
10899 if (size
== 3 && !is_q
) {
10900 unallocated_encoding(s
);
10906 if (!fp_access_check(s
)) {
10911 case 0x01: /* SQADD, UQADD */
10913 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_uqadd_qc
, size
);
10915 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_sqadd_qc
, size
);
10918 case 0x05: /* SQSUB, UQSUB */
10920 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_uqsub_qc
, size
);
10922 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_sqsub_qc
, size
);
10925 case 0x08: /* SSHL, USHL */
10927 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_ushl
, size
);
10929 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_sshl
, size
);
10932 case 0x0c: /* SMAX, UMAX */
10934 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_umax
, size
);
10936 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_smax
, size
);
10939 case 0x0d: /* SMIN, UMIN */
10941 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_umin
, size
);
10943 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_smin
, size
);
10946 case 0xe: /* SABD, UABD */
10948 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_uabd
, size
);
10950 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_sabd
, size
);
10953 case 0xf: /* SABA, UABA */
10955 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_uaba
, size
);
10957 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_saba
, size
);
10960 case 0x10: /* ADD, SUB */
10962 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_sub
, size
);
10964 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_add
, size
);
10967 case 0x13: /* MUL, PMUL */
10968 if (!u
) { /* MUL */
10969 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, tcg_gen_gvec_mul
, size
);
10970 } else { /* PMUL */
10971 gen_gvec_op3_ool(s
, is_q
, rd
, rn
, rm
, 0, gen_helper_gvec_pmul_b
);
10974 case 0x12: /* MLA, MLS */
10976 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_mls
, size
);
10978 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_mla
, size
);
10981 case 0x16: /* SQDMULH, SQRDMULH */
10983 static gen_helper_gvec_3_ptr
* const fns
[2][2] = {
10984 { gen_helper_neon_sqdmulh_h
, gen_helper_neon_sqrdmulh_h
},
10985 { gen_helper_neon_sqdmulh_s
, gen_helper_neon_sqrdmulh_s
},
10987 gen_gvec_op3_qc(s
, is_q
, rd
, rn
, rm
, fns
[size
- 1][u
]);
10991 if (!u
) { /* CMTST */
10992 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_cmtst
, size
);
10996 cond
= TCG_COND_EQ
;
10998 case 0x06: /* CMGT, CMHI */
10999 cond
= u
? TCG_COND_GTU
: TCG_COND_GT
;
11001 case 0x07: /* CMGE, CMHS */
11002 cond
= u
? TCG_COND_GEU
: TCG_COND_GE
;
11004 tcg_gen_gvec_cmp(cond
, size
, vec_full_reg_offset(s
, rd
),
11005 vec_full_reg_offset(s
, rn
),
11006 vec_full_reg_offset(s
, rm
),
11007 is_q
? 16 : 8, vec_full_reg_size(s
));
11013 for (pass
= 0; pass
< 2; pass
++) {
11014 TCGv_i64 tcg_op1
= tcg_temp_new_i64();
11015 TCGv_i64 tcg_op2
= tcg_temp_new_i64();
11016 TCGv_i64 tcg_res
= tcg_temp_new_i64();
11018 read_vec_element(s
, tcg_op1
, rn
, pass
, MO_64
);
11019 read_vec_element(s
, tcg_op2
, rm
, pass
, MO_64
);
11021 handle_3same_64(s
, opcode
, u
, tcg_res
, tcg_op1
, tcg_op2
);
11023 write_vec_element(s
, tcg_res
, rd
, pass
, MO_64
);
11026 for (pass
= 0; pass
< (is_q
? 4 : 2); pass
++) {
11027 TCGv_i32 tcg_op1
= tcg_temp_new_i32();
11028 TCGv_i32 tcg_op2
= tcg_temp_new_i32();
11029 TCGv_i32 tcg_res
= tcg_temp_new_i32();
11030 NeonGenTwoOpFn
*genfn
= NULL
;
11031 NeonGenTwoOpEnvFn
*genenvfn
= NULL
;
11033 read_vec_element_i32(s
, tcg_op1
, rn
, pass
, MO_32
);
11034 read_vec_element_i32(s
, tcg_op2
, rm
, pass
, MO_32
);
11037 case 0x0: /* SHADD, UHADD */
11039 static NeonGenTwoOpFn
* const fns
[3][2] = {
11040 { gen_helper_neon_hadd_s8
, gen_helper_neon_hadd_u8
},
11041 { gen_helper_neon_hadd_s16
, gen_helper_neon_hadd_u16
},
11042 { gen_helper_neon_hadd_s32
, gen_helper_neon_hadd_u32
},
11044 genfn
= fns
[size
][u
];
11047 case 0x2: /* SRHADD, URHADD */
11049 static NeonGenTwoOpFn
* const fns
[3][2] = {
11050 { gen_helper_neon_rhadd_s8
, gen_helper_neon_rhadd_u8
},
11051 { gen_helper_neon_rhadd_s16
, gen_helper_neon_rhadd_u16
},
11052 { gen_helper_neon_rhadd_s32
, gen_helper_neon_rhadd_u32
},
11054 genfn
= fns
[size
][u
];
11057 case 0x4: /* SHSUB, UHSUB */
11059 static NeonGenTwoOpFn
* const fns
[3][2] = {
11060 { gen_helper_neon_hsub_s8
, gen_helper_neon_hsub_u8
},
11061 { gen_helper_neon_hsub_s16
, gen_helper_neon_hsub_u16
},
11062 { gen_helper_neon_hsub_s32
, gen_helper_neon_hsub_u32
},
11064 genfn
= fns
[size
][u
];
11067 case 0x9: /* SQSHL, UQSHL */
11069 static NeonGenTwoOpEnvFn
* const fns
[3][2] = {
11070 { gen_helper_neon_qshl_s8
, gen_helper_neon_qshl_u8
},
11071 { gen_helper_neon_qshl_s16
, gen_helper_neon_qshl_u16
},
11072 { gen_helper_neon_qshl_s32
, gen_helper_neon_qshl_u32
},
11074 genenvfn
= fns
[size
][u
];
11077 case 0xa: /* SRSHL, URSHL */
11079 static NeonGenTwoOpFn
* const fns
[3][2] = {
11080 { gen_helper_neon_rshl_s8
, gen_helper_neon_rshl_u8
},
11081 { gen_helper_neon_rshl_s16
, gen_helper_neon_rshl_u16
},
11082 { gen_helper_neon_rshl_s32
, gen_helper_neon_rshl_u32
},
11084 genfn
= fns
[size
][u
];
11087 case 0xb: /* SQRSHL, UQRSHL */
11089 static NeonGenTwoOpEnvFn
* const fns
[3][2] = {
11090 { gen_helper_neon_qrshl_s8
, gen_helper_neon_qrshl_u8
},
11091 { gen_helper_neon_qrshl_s16
, gen_helper_neon_qrshl_u16
},
11092 { gen_helper_neon_qrshl_s32
, gen_helper_neon_qrshl_u32
},
11094 genenvfn
= fns
[size
][u
];
11098 g_assert_not_reached();
11102 genenvfn(tcg_res
, cpu_env
, tcg_op1
, tcg_op2
);
11104 genfn(tcg_res
, tcg_op1
, tcg_op2
);
11107 write_vec_element_i32(s
, tcg_res
, rd
, pass
, MO_32
);
11110 clear_vec_high(s
, is_q
, rd
);
11113 /* AdvSIMD three same
11114 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
11115 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11116 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
11117 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11119 static void disas_simd_three_reg_same(DisasContext
*s
, uint32_t insn
)
11121 int opcode
= extract32(insn
, 11, 5);
11124 case 0x3: /* logic ops */
11125 disas_simd_3same_logic(s
, insn
);
11127 case 0x17: /* ADDP */
11128 case 0x14: /* SMAXP, UMAXP */
11129 case 0x15: /* SMINP, UMINP */
11131 /* Pairwise operations */
11132 int is_q
= extract32(insn
, 30, 1);
11133 int u
= extract32(insn
, 29, 1);
11134 int size
= extract32(insn
, 22, 2);
11135 int rm
= extract32(insn
, 16, 5);
11136 int rn
= extract32(insn
, 5, 5);
11137 int rd
= extract32(insn
, 0, 5);
11138 if (opcode
== 0x17) {
11139 if (u
|| (size
== 3 && !is_q
)) {
11140 unallocated_encoding(s
);
11145 unallocated_encoding(s
);
11149 handle_simd_3same_pair(s
, is_q
, u
, opcode
, size
, rn
, rm
, rd
);
11152 case 0x18 ... 0x31:
11153 /* floating point ops, sz[1] and U are part of opcode */
11154 disas_simd_3same_float(s
, insn
);
11157 disas_simd_3same_int(s
, insn
);
11163 * Advanced SIMD three same (ARMv8.2 FP16 variants)
11165 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
11166 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11167 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
11168 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11170 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11171 * (register), FACGE, FABD, FCMGT (register) and FACGT.
11174 static void disas_simd_three_reg_same_fp16(DisasContext
*s
, uint32_t insn
)
11176 int opcode
= extract32(insn
, 11, 3);
11177 int u
= extract32(insn
, 29, 1);
11178 int a
= extract32(insn
, 23, 1);
11179 int is_q
= extract32(insn
, 30, 1);
11180 int rm
= extract32(insn
, 16, 5);
11181 int rn
= extract32(insn
, 5, 5);
11182 int rd
= extract32(insn
, 0, 5);
11184 * For these floating point ops, the U, a and opcode bits
11185 * together indicate the operation.
11187 int fpopcode
= opcode
| (a
<< 3) | (u
<< 4);
11188 int datasize
= is_q
? 128 : 64;
11189 int elements
= datasize
/ 16;
11194 switch (fpopcode
) {
11195 case 0x0: /* FMAXNM */
11196 case 0x1: /* FMLA */
11197 case 0x2: /* FADD */
11198 case 0x3: /* FMULX */
11199 case 0x4: /* FCMEQ */
11200 case 0x6: /* FMAX */
11201 case 0x7: /* FRECPS */
11202 case 0x8: /* FMINNM */
11203 case 0x9: /* FMLS */
11204 case 0xa: /* FSUB */
11205 case 0xe: /* FMIN */
11206 case 0xf: /* FRSQRTS */
11207 case 0x13: /* FMUL */
11208 case 0x14: /* FCMGE */
11209 case 0x15: /* FACGE */
11210 case 0x17: /* FDIV */
11211 case 0x1a: /* FABD */
11212 case 0x1c: /* FCMGT */
11213 case 0x1d: /* FACGT */
11216 case 0x10: /* FMAXNMP */
11217 case 0x12: /* FADDP */
11218 case 0x16: /* FMAXP */
11219 case 0x18: /* FMINNMP */
11220 case 0x1e: /* FMINP */
11224 unallocated_encoding(s
);
11228 if (!dc_isar_feature(aa64_fp16
, s
)) {
11229 unallocated_encoding(s
);
11233 if (!fp_access_check(s
)) {
11237 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
11240 int maxpass
= is_q
? 8 : 4;
11241 TCGv_i32 tcg_op1
= tcg_temp_new_i32();
11242 TCGv_i32 tcg_op2
= tcg_temp_new_i32();
11243 TCGv_i32 tcg_res
[8];
11245 for (pass
= 0; pass
< maxpass
; pass
++) {
11246 int passreg
= pass
< (maxpass
/ 2) ? rn
: rm
;
11247 int passelt
= (pass
<< 1) & (maxpass
- 1);
11249 read_vec_element_i32(s
, tcg_op1
, passreg
, passelt
, MO_16
);
11250 read_vec_element_i32(s
, tcg_op2
, passreg
, passelt
+ 1, MO_16
);
11251 tcg_res
[pass
] = tcg_temp_new_i32();
11253 switch (fpopcode
) {
11254 case 0x10: /* FMAXNMP */
11255 gen_helper_advsimd_maxnumh(tcg_res
[pass
], tcg_op1
, tcg_op2
,
11258 case 0x12: /* FADDP */
11259 gen_helper_advsimd_addh(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
11261 case 0x16: /* FMAXP */
11262 gen_helper_advsimd_maxh(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
11264 case 0x18: /* FMINNMP */
11265 gen_helper_advsimd_minnumh(tcg_res
[pass
], tcg_op1
, tcg_op2
,
11268 case 0x1e: /* FMINP */
11269 gen_helper_advsimd_minh(tcg_res
[pass
], tcg_op1
, tcg_op2
, fpst
);
11272 g_assert_not_reached();
11276 for (pass
= 0; pass
< maxpass
; pass
++) {
11277 write_vec_element_i32(s
, tcg_res
[pass
], rd
, pass
, MO_16
);
11280 for (pass
= 0; pass
< elements
; pass
++) {
11281 TCGv_i32 tcg_op1
= tcg_temp_new_i32();
11282 TCGv_i32 tcg_op2
= tcg_temp_new_i32();
11283 TCGv_i32 tcg_res
= tcg_temp_new_i32();
11285 read_vec_element_i32(s
, tcg_op1
, rn
, pass
, MO_16
);
11286 read_vec_element_i32(s
, tcg_op2
, rm
, pass
, MO_16
);
11288 switch (fpopcode
) {
11289 case 0x0: /* FMAXNM */
11290 gen_helper_advsimd_maxnumh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11292 case 0x1: /* FMLA */
11293 read_vec_element_i32(s
, tcg_res
, rd
, pass
, MO_16
);
11294 gen_helper_advsimd_muladdh(tcg_res
, tcg_op1
, tcg_op2
, tcg_res
,
11297 case 0x2: /* FADD */
11298 gen_helper_advsimd_addh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11300 case 0x3: /* FMULX */
11301 gen_helper_advsimd_mulxh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11303 case 0x4: /* FCMEQ */
11304 gen_helper_advsimd_ceq_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11306 case 0x6: /* FMAX */
11307 gen_helper_advsimd_maxh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11309 case 0x7: /* FRECPS */
11310 gen_helper_recpsf_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11312 case 0x8: /* FMINNM */
11313 gen_helper_advsimd_minnumh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11315 case 0x9: /* FMLS */
11316 /* As usual for ARM, separate negation for fused multiply-add */
11317 tcg_gen_xori_i32(tcg_op1
, tcg_op1
, 0x8000);
11318 read_vec_element_i32(s
, tcg_res
, rd
, pass
, MO_16
);
11319 gen_helper_advsimd_muladdh(tcg_res
, tcg_op1
, tcg_op2
, tcg_res
,
11322 case 0xa: /* FSUB */
11323 gen_helper_advsimd_subh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11325 case 0xe: /* FMIN */
11326 gen_helper_advsimd_minh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11328 case 0xf: /* FRSQRTS */
11329 gen_helper_rsqrtsf_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11331 case 0x13: /* FMUL */
11332 gen_helper_advsimd_mulh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11334 case 0x14: /* FCMGE */
11335 gen_helper_advsimd_cge_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11337 case 0x15: /* FACGE */
11338 gen_helper_advsimd_acge_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11340 case 0x17: /* FDIV */
11341 gen_helper_advsimd_divh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11343 case 0x1a: /* FABD */
11344 gen_helper_advsimd_subh(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11345 tcg_gen_andi_i32(tcg_res
, tcg_res
, 0x7fff);
11347 case 0x1c: /* FCMGT */
11348 gen_helper_advsimd_cgt_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11350 case 0x1d: /* FACGT */
11351 gen_helper_advsimd_acgt_f16(tcg_res
, tcg_op1
, tcg_op2
, fpst
);
11354 g_assert_not_reached();
11357 write_vec_element_i32(s
, tcg_res
, rd
, pass
, MO_16
);
11361 clear_vec_high(s
, is_q
, rd
);
11364 /* AdvSIMD three same extra
11365 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
11366 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11367 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
11368 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11370 static void disas_simd_three_reg_same_extra(DisasContext
*s
, uint32_t insn
)
11372 int rd
= extract32(insn
, 0, 5);
11373 int rn
= extract32(insn
, 5, 5);
11374 int opcode
= extract32(insn
, 11, 4);
11375 int rm
= extract32(insn
, 16, 5);
11376 int size
= extract32(insn
, 22, 2);
11377 bool u
= extract32(insn
, 29, 1);
11378 bool is_q
= extract32(insn
, 30, 1);
11382 switch (u
* 16 + opcode
) {
11383 case 0x10: /* SQRDMLAH (vector) */
11384 case 0x11: /* SQRDMLSH (vector) */
11385 if (size
!= 1 && size
!= 2) {
11386 unallocated_encoding(s
);
11389 feature
= dc_isar_feature(aa64_rdm
, s
);
11391 case 0x02: /* SDOT (vector) */
11392 case 0x12: /* UDOT (vector) */
11393 if (size
!= MO_32
) {
11394 unallocated_encoding(s
);
11397 feature
= dc_isar_feature(aa64_dp
, s
);
11399 case 0x03: /* USDOT */
11400 if (size
!= MO_32
) {
11401 unallocated_encoding(s
);
11404 feature
= dc_isar_feature(aa64_i8mm
, s
);
11406 case 0x04: /* SMMLA */
11407 case 0x14: /* UMMLA */
11408 case 0x05: /* USMMLA */
11409 if (!is_q
|| size
!= MO_32
) {
11410 unallocated_encoding(s
);
11413 feature
= dc_isar_feature(aa64_i8mm
, s
);
11415 case 0x18: /* FCMLA, #0 */
11416 case 0x19: /* FCMLA, #90 */
11417 case 0x1a: /* FCMLA, #180 */
11418 case 0x1b: /* FCMLA, #270 */
11419 case 0x1c: /* FCADD, #90 */
11420 case 0x1e: /* FCADD, #270 */
11422 || (size
== 1 && !dc_isar_feature(aa64_fp16
, s
))
11423 || (size
== 3 && !is_q
)) {
11424 unallocated_encoding(s
);
11427 feature
= dc_isar_feature(aa64_fcma
, s
);
11429 case 0x1d: /* BFMMLA */
11430 if (size
!= MO_16
|| !is_q
) {
11431 unallocated_encoding(s
);
11434 feature
= dc_isar_feature(aa64_bf16
, s
);
11438 case 1: /* BFDOT */
11439 case 3: /* BFMLAL{B,T} */
11440 feature
= dc_isar_feature(aa64_bf16
, s
);
11443 unallocated_encoding(s
);
11448 unallocated_encoding(s
);
11452 unallocated_encoding(s
);
11455 if (!fp_access_check(s
)) {
11460 case 0x0: /* SQRDMLAH (vector) */
11461 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_sqrdmlah_qc
, size
);
11464 case 0x1: /* SQRDMLSH (vector) */
11465 gen_gvec_fn3(s
, is_q
, rd
, rn
, rm
, gen_gvec_sqrdmlsh_qc
, size
);
11468 case 0x2: /* SDOT / UDOT */
11469 gen_gvec_op4_ool(s
, is_q
, rd
, rn
, rm
, rd
, 0,
11470 u
? gen_helper_gvec_udot_b
: gen_helper_gvec_sdot_b
);
11473 case 0x3: /* USDOT */
11474 gen_gvec_op4_ool(s
, is_q
, rd
, rn
, rm
, rd
, 0, gen_helper_gvec_usdot_b
);
11477 case 0x04: /* SMMLA, UMMLA */
11478 gen_gvec_op4_ool(s
, 1, rd
, rn
, rm
, rd
, 0,
11479 u
? gen_helper_gvec_ummla_b
11480 : gen_helper_gvec_smmla_b
);
11482 case 0x05: /* USMMLA */
11483 gen_gvec_op4_ool(s
, 1, rd
, rn
, rm
, rd
, 0, gen_helper_gvec_usmmla_b
);
11486 case 0x8: /* FCMLA, #0 */
11487 case 0x9: /* FCMLA, #90 */
11488 case 0xa: /* FCMLA, #180 */
11489 case 0xb: /* FCMLA, #270 */
11490 rot
= extract32(opcode
, 0, 2);
11493 gen_gvec_op4_fpst(s
, is_q
, rd
, rn
, rm
, rd
, true, rot
,
11494 gen_helper_gvec_fcmlah
);
11497 gen_gvec_op4_fpst(s
, is_q
, rd
, rn
, rm
, rd
, false, rot
,
11498 gen_helper_gvec_fcmlas
);
11501 gen_gvec_op4_fpst(s
, is_q
, rd
, rn
, rm
, rd
, false, rot
,
11502 gen_helper_gvec_fcmlad
);
11505 g_assert_not_reached();
11509 case 0xc: /* FCADD, #90 */
11510 case 0xe: /* FCADD, #270 */
11511 rot
= extract32(opcode
, 1, 1);
11514 gen_gvec_op3_fpst(s
, is_q
, rd
, rn
, rm
, size
== 1, rot
,
11515 gen_helper_gvec_fcaddh
);
11518 gen_gvec_op3_fpst(s
, is_q
, rd
, rn
, rm
, size
== 1, rot
,
11519 gen_helper_gvec_fcadds
);
11522 gen_gvec_op3_fpst(s
, is_q
, rd
, rn
, rm
, size
== 1, rot
,
11523 gen_helper_gvec_fcaddd
);
11526 g_assert_not_reached();
11530 case 0xd: /* BFMMLA */
11531 gen_gvec_op4_ool(s
, is_q
, rd
, rn
, rm
, rd
, 0, gen_helper_gvec_bfmmla
);
11535 case 1: /* BFDOT */
11536 gen_gvec_op4_ool(s
, is_q
, rd
, rn
, rm
, rd
, 0, gen_helper_gvec_bfdot
);
11538 case 3: /* BFMLAL{B,T} */
11539 gen_gvec_op4_fpst(s
, 1, rd
, rn
, rm
, rd
, false, is_q
,
11540 gen_helper_gvec_bfmlal
);
11543 g_assert_not_reached();
11548 g_assert_not_reached();
11552 static void handle_2misc_widening(DisasContext
*s
, int opcode
, bool is_q
,
11553 int size
, int rn
, int rd
)
11555 /* Handle 2-reg-misc ops which are widening (so each size element
11556 * in the source becomes a 2*size element in the destination.
11557 * The only instruction like this is FCVTL.
11562 /* 32 -> 64 bit fp conversion */
11563 TCGv_i64 tcg_res
[2];
11564 int srcelt
= is_q
? 2 : 0;
11566 for (pass
= 0; pass
< 2; pass
++) {
11567 TCGv_i32 tcg_op
= tcg_temp_new_i32();
11568 tcg_res
[pass
] = tcg_temp_new_i64();
11570 read_vec_element_i32(s
, tcg_op
, rn
, srcelt
+ pass
, MO_32
);
11571 gen_helper_vfp_fcvtds(tcg_res
[pass
], tcg_op
, cpu_env
);
11573 for (pass
= 0; pass
< 2; pass
++) {
11574 write_vec_element(s
, tcg_res
[pass
], rd
, pass
, MO_64
);
11577 /* 16 -> 32 bit fp conversion */
11578 int srcelt
= is_q
? 4 : 0;
11579 TCGv_i32 tcg_res
[4];
11580 TCGv_ptr fpst
= fpstatus_ptr(FPST_FPCR
);
11581 TCGv_i32 ahp
= get_ahp_flag();
11583 for (pass
= 0; pass
< 4; pass
++) {
11584 tcg_res
[pass
] = tcg_temp_new_i32();
11586 read_vec_element_i32(s
, tcg_res
[pass
], rn
, srcelt
+ pass
, MO_16
);
11587 gen_helper_vfp_fcvt_f16_to_f32(tcg_res
[pass
], tcg_res
[pass
],
11590 for (pass
= 0; pass
< 4; pass
++) {
11591 write_vec_element_i32(s
, tcg_res
[pass
], rd
, pass
, MO_32
);
11596 static void handle_rev(DisasContext
*s
, int opcode
, bool u
,
11597 bool is_q
, int size
, int rn
, int rd
)
11599 int op
= (opcode
<< 1) | u
;
11600 int opsz
= op
+ size
;
11601 int grp_size
= 3 - opsz
;
11602 int dsize
= is_q
? 128 : 64;
11606 unallocated_encoding(s
);
11610 if (!fp_access_check(s
)) {
11615 /* Special case bytes, use bswap op on each group of elements */
11616 int groups
= dsize
/ (8 << grp_size
);
11618 for (i
= 0; i
< groups
; i
++) {
11619 TCGv_i64 tcg_tmp
= tcg_temp_new_i64();
11621 read_vec_element(s
, tcg_tmp
, rn
, i
, grp_size
);
11622 switch (grp_size
) {
11624 tcg_gen_bswap16_i64(tcg_tmp
, tcg_tmp
, TCG_BSWAP_IZ
);
11627 tcg_gen_bswap32_i64(tcg_tmp
, tcg_tmp
, TCG_BSWAP_IZ
);
11630 tcg_gen_bswap64_i64(tcg_tmp
, tcg_tmp
);
11633 g_assert_not_reached();
11635 write_vec_element(s
, tcg_tmp
, rd
, i
, grp_size
);
11637 clear_vec_high(s
, is_q
, rd
);
11639 int revmask
= (1 << grp_size
) - 1;
11640 int esize
= 8 << size
;
11641 int elements
= dsize
/ esize
;
11642 TCGv_i64 tcg_rn
= tcg_temp_new_i64();
11643 TCGv_i64 tcg_rd
[2];
11645 for (i
= 0; i
< 2; i
++) {
11646 tcg_rd
[i
] = tcg_temp_new_i64();
11647 tcg_gen_movi_i64(tcg_rd
[i
], 0);
11650 for (i
= 0; i
< elements
; i
++) {
11651 int e_rev
= (i
& 0xf) ^ revmask
;
11652 int w
= (e_rev
* esize
) / 64;
11653 int o
= (e_rev
* esize
) % 64;
11655 read_vec_element(s
, tcg_rn
, rn
, i
, size
);
11656 tcg_gen_deposit_i64(tcg_rd
[w
], tcg_rd
[w
], tcg_rn
, o
, esize
);
11659 for (i
= 0; i
< 2; i
++) {
11660 write_vec_element(s
, tcg_rd
[i
], rd
, i
, MO_64
);
11662 clear_vec_high(s
, true, rd
);
11666 static void handle_2misc_pairwise(DisasContext
*s
, int opcode
, bool u
,
11667 bool is_q
, int size
, int rn
, int rd
)
11669 /* Implement the pairwise operations from 2-misc:
11670 * SADDLP, UADDLP, SADALP, UADALP.
11671 * These all add pairs of elements in the input to produce a
11672 * double-width result element in the output (possibly accumulating).
11674 bool accum
= (opcode
== 0x6);
11675 int maxpass
= is_q
? 2 : 1;
11677 TCGv_i64 tcg_res
[2];
11680 /* 32 + 32 -> 64 op */
11681 MemOp memop
= size
+ (u
? 0 : MO_SIGN
);
11683 for (pass
= 0; pass
< maxpass
; pass
++) {
11684 TCGv_i64 tcg_op1
= tcg_temp_new_i64();
11685 TCGv_i64 tcg_op2
= tcg_temp_new_i64();
11687 tcg_res
[pass
] = tcg_temp_new_i64();
11689 read_vec_element(s
, tcg_op1
, rn
, pass
* 2, memop
);
11690 read_vec_element(s
, tcg_op2
, rn
, pass
* 2 + 1, memop
);
11691 tcg_gen_add_i64(tcg_res
[pass
], tcg_op1
, tcg_op2
);
11693 read_vec_element(s
, tcg_op1
, rd
, pass
, MO_64
);
11694 tcg_gen_add_i64(tcg_res
[pass
], tcg_res
[pass
], tcg_op1
);
11698 for (pass
= 0; pass
< maxpass
; pass
++) {
11699 TCGv_i64 tcg_op
= tcg_temp_new_i64();
11700 NeonGenOne64OpFn
*genfn
;
11701 static NeonGenOne64OpFn
* const fns
[2][2] = {
11702 { gen_helper_neon_addlp_s8
, gen_helper_neon_addlp_u8
},
11703 { gen_helper_neon_addlp_s16
, gen_helper_neon_addlp_u16
},
11706 genfn
= fns
[size
][u
];
11708 tcg_res
[pass
] = tcg_temp_new_i64();
11710 read_vec_element(s
, tcg_op
, rn
, pass
, MO_64
);
11711 genfn(tcg_res
[pass
], tcg_op
);
11714 read_vec_element(s
, tcg_op
, rd
, pass
, MO_64
);
11716 gen_helper_neon_addl_u16(tcg_res
[pass
],
11717 tcg_res
[pass
], tcg_op
);
11719 gen_helper_neon_addl_u32(tcg_res
[pass
],
11720 tcg_res
[pass
], tcg_op
);
11726 tcg_res
[1] = tcg_constant_i64(0);
11728 for (pass
= 0; pass
< 2; pass
++) {
11729 write_vec_element(s
, tcg_res
[pass
], rd
, pass
, MO_64
);
11733 static void handle_shll(DisasContext
*s
, bool is_q
, int size
, int rn
, int rd
)
11735 /* Implement SHLL and SHLL2 */
11737 int part
= is_q
? 2 : 0;
11738 TCGv_i64 tcg_res
[2];
11740 for (pass
= 0; pass
< 2; pass
++) {
11741 static NeonGenWidenFn
* const widenfns
[3] = {
11742 gen_helper_neon_widen_u8
,
11743 gen_helper_neon_widen_u16
,
11744 tcg_gen_extu_i32_i64
,
11746 NeonGenWidenFn
*widenfn
= widenfns
[size
];
11747 TCGv_i32 tcg_op
= tcg_temp_new_i32();
11749 read_vec_element_i32(s
, tcg_op
, rn
, part
+ pass
, MO_32
);
11750 tcg_res
[pass
] = tcg_temp_new_i64();
11751 widenfn(tcg_res
[pass
], tcg_op
);
11752 tcg_gen_shli_i64(tcg_res
[pass
], tcg_res
[pass
], 8 << size
);
11755 for (pass
= 0; pass
< 2; pass
++) {
11756 write_vec_element(s
, tcg_res
[pass
], rd
, pass
, MO_64
);
11760 /* AdvSIMD two reg misc
11761 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
11762 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11763 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
11764 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11766 static void disas_simd_two_reg_misc(DisasContext
*s
, uint32_t insn
)
11768 int size
= extract32(insn
, 22, 2);
11769 int opcode
= extract32(insn
, 12, 5);
11770 bool u
= extract32(insn
, 29, 1);
11771 bool is_q
= extract32(insn
, 30, 1);
11772 int rn
= extract32(insn
, 5, 5);
11773 int rd
= extract32(insn
, 0, 5);
11774 bool need_fpstatus
= false;
11776 TCGv_i32 tcg_rmode
;
11777 TCGv_ptr tcg_fpstatus
;
11780 case 0x0: /* REV64, REV32 */
11781 case 0x1: /* REV16 */
11782 handle_rev(s
, opcode
, u
, is_q
, size
, rn
, rd
);
11784 case 0x5: /* CNT, NOT, RBIT */
11785 if (u
&& size
== 0) {
11788 } else if (u
&& size
== 1) {
11791 } else if (!u
&& size
== 0) {
11795 unallocated_encoding(s
);
11797 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11798 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11800 unallocated_encoding(s
);
11803 if (!fp_access_check(s
)) {
11807 handle_2misc_narrow(s
, false, opcode
, u
, is_q
, size
, rn
, rd
);
11809 case 0x4: /* CLS, CLZ */
11811 unallocated_encoding(s
);
11815 case 0x2: /* SADDLP, UADDLP */
11816 case 0x6: /* SADALP, UADALP */
11818 unallocated_encoding(s
);
11821 if (!fp_access_check(s
)) {
11824 handle_2misc_pairwise(s
, opcode
, u
, is_q
, size
, rn
, rd
);
11826 case 0x13: /* SHLL, SHLL2 */
11827 if (u
== 0 || size
== 3) {
11828 unallocated_encoding(s
);
11831 if (!fp_access_check(s
)) {
11834 handle_shll(s
, is_q
, size
, rn
, rd
);
11836 case 0xa: /* CMLT */
11838 unallocated_encoding(s
);
11842 case 0x8: /* CMGT, CMGE */
11843 case 0x9: /* CMEQ, CMLE */
11844 case 0xb: /* ABS, NEG */
11845 if (size
== 3 && !is_q
) {
11846 unallocated_encoding(s
);
11850 case 0x3: /* SUQADD, USQADD */
11851 if (size
== 3 && !is_q
) {
11852 unallocated_encoding(s
);
11855 if (!fp_access_check(s
)) {
11858 handle_2misc_satacc(s
, false, u
, is_q
, size
, rn
, rd
);
11860 case 0x7: /* SQABS, SQNEG */
11861 if (size
== 3 && !is_q
) {
11862 unallocated_encoding(s
);
11867 case 0x16 ... 0x1f:
11869 /* Floating point: U, size[1] and opcode indicate operation;
11870 * size[0] indicates single or double precision.
11872 int is_double
= extract32(size
, 0, 1);
11873 opcode
|= (extract32(size
, 1, 1) << 5) | (u
<< 6);
11874 size
= is_double
? 3 : 2;
11876 case 0x2f: /* FABS */
11877 case 0x6f: /* FNEG */
11878 if (size
== 3 && !is_q
) {
11879 unallocated_encoding(s
);
11883 case 0x1d: /* SCVTF */
11884 case 0x5d: /* UCVTF */
11886 bool is_signed
= (opcode
== 0x1d) ? true : false;
11887 int elements
= is_double
? 2 : is_q
? 4 : 2;
11888 if (is_double
&& !is_q
) {
11889 unallocated_encoding(s
);
11892 if (!fp_access_check(s
)) {
11895 handle_simd_intfp_conv(s
, rd
, rn
, elements
, is_signed
, 0, size
);
11898 case 0x2c: /* FCMGT (zero) */
11899 case 0x2d: /* FCMEQ (zero) */
11900 case 0x2e: /* FCMLT (zero) */
11901 case 0x6c: /* FCMGE (zero) */
11902 case 0x6d: /* FCMLE (zero) */
11903 if (size
== 3 && !is_q
) {
11904 unallocated_encoding(s
);
11907 handle_2misc_fcmp_zero(s
, opcode
, false, u
, is_q
, size
, rn
, rd
);
11909 case 0x7f: /* FSQRT */
11910 if (size
== 3 && !is_q
) {
11911 unallocated_encoding(s
);
11915 case 0x1a: /* FCVTNS */
11916 case 0x1b: /* FCVTMS */
11917 case 0x3a: /* FCVTPS */
11918 case 0x3b: /* FCVTZS */
11919 case 0x5a: /* FCVTNU */
11920 case 0x5b: /* FCVTMU */
11921 case 0x7a: /* FCVTPU */
11922 case 0x7b: /* FCVTZU */
11923 need_fpstatus
= true;
11924 rmode
= extract32(opcode
, 5, 1) | (extract32(opcode
, 0, 1) << 1);
11925 if (size
== 3 && !is_q
) {
11926 unallocated_encoding(s
);
11930 case 0x5c: /* FCVTAU */
11931 case 0x1c: /* FCVTAS */
11932 need_fpstatus
= true;
11933 rmode
= FPROUNDING_TIEAWAY
;
11934 if (size
== 3 && !is_q
) {
11935 unallocated_encoding(s
);
11939 case 0x3c: /* URECPE */
11941 unallocated_encoding(s
);
11945 case 0x3d: /* FRECPE */
11946 case 0x7d: /* FRSQRTE */
11947 if (size
== 3 && !is_q
) {
11948 unallocated_encoding(s
);
11951 if (!fp_access_check(s
)) {
11954 handle_2misc_reciprocal(s
, opcode
, false, u
, is_q
, size
, rn
, rd
);
11956 case 0x56: /* FCVTXN, FCVTXN2 */
11958 unallocated_encoding(s
);
11962 case 0x16: /* FCVTN, FCVTN2 */
11963 /* handle_2misc_narrow does a 2*size -> size operation, but these
11964 * instructions encode the source size rather than dest size.
11966 if (!fp_access_check(s
)) {
11969 handle_2misc_narrow(s
, false, opcode
, 0, is_q
, size
- 1, rn
, rd
);
11971 case 0x36: /* BFCVTN, BFCVTN2 */
11972 if (!dc_isar_feature(aa64_bf16
, s
) || size
!= 2) {
11973 unallocated_encoding(s
);
11976 if (!fp_access_check(s
)) {
11979 handle_2misc_narrow(s
, false, opcode
, 0, is_q
, size
- 1, rn
, rd
);
11981 case 0x17: /* FCVTL, FCVTL2 */
11982 if (!fp_access_check(s
)) {
11985 handle_2misc_widening(s
, opcode
, is_q
, size
, rn
, rd
);
11987 case 0x18: /* FRINTN */
11988 case 0x19: /* FRINTM */
11989 case 0x38: /* FRINTP */
11990 case 0x39: /* FRINTZ */
11991 rmode
= extract32(opcode
, 5, 1) | (extract32(opcode
, 0, 1) << 1);
11993 case 0x59: /* FRINTX */
11994 case 0x79: /* FRINTI */
11995 need_fpstatus
= true;
11996 if (size
== 3 && !is_q
) {
11997 unallocated_encoding(s
);
12001 case 0x58: /* FRINTA */
12002 rmode
= FPROUNDING_TIEAWAY
;
12003 need_fpstatus
= true;
12004 if (size
== 3 && !is_q
) {
12005 unallocated_encoding(s
);
12009 case 0x7c: /* URSQRTE */
12011 unallocated_encoding(s
);
12015 case 0x1e: /* FRINT32Z */
12016 case 0x1f: /* FRINT64Z */
12017 rmode
= FPROUNDING_ZERO
;
12019 case 0x5e: /* FRINT32X */
12020 case 0x5f: /* FRINT64X */
12021 need_fpstatus
= true;
12022 if ((size
== 3 && !is_q
) || !dc_isar_feature(aa64_frint
, s
)) {
12023 unallocated_encoding(s
);
12028 unallocated_encoding(s
);
12034 unallocated_encoding(s
);
12038 if (!fp_access_check(s
)) {
12042 if (need_fpstatus
|| rmode
>= 0) {
12043 tcg_fpstatus
= fpstatus_ptr(FPST_FPCR
);
12045 tcg_fpstatus
= NULL
;
12048 tcg_rmode
= gen_set_rmode(rmode
, tcg_fpstatus
);
12055 if (u
&& size
== 0) { /* NOT */
12056 gen_gvec_fn2(s
, is_q
, rd
, rn
, tcg_gen_gvec_not
, 0);
12060 case 0x8: /* CMGT, CMGE */
12062 gen_gvec_fn2(s
, is_q
, rd
, rn
, gen_gvec_cge0
, size
);
12064 gen_gvec_fn2(s
, is_q
, rd
, rn
, gen_gvec_cgt0
, size
);
12067 case 0x9: /* CMEQ, CMLE */
12069 gen_gvec_fn2(s
, is_q
, rd
, rn
, gen_gvec_cle0
, size
);
12071 gen_gvec_fn2(s
, is_q
, rd
, rn
, gen_gvec_ceq0
, size
);
12074 case 0xa: /* CMLT */
12075 gen_gvec_fn2(s
, is_q
, rd
, rn
, gen_gvec_clt0
, size
);
12078 if (u
) { /* ABS, NEG */
12079 gen_gvec_fn2(s
, is_q
, rd
, rn
, tcg_gen_gvec_neg
, size
);
12081 gen_gvec_fn2(s
, is_q
, rd
, rn
, tcg_gen_gvec_abs
, size
);
12087 /* All 64-bit element operations can be shared with scalar 2misc */
12090 /* Coverity claims (size == 3 && !is_q) has been eliminated
12091 * from all paths leading to here.
12093 tcg_debug_assert(is_q
);
12094 for (pass
= 0; pass
< 2; pass
++) {
12095 TCGv_i64 tcg_op
= tcg_temp_new_i64();
12096 TCGv_i64 tcg_res
= tcg_temp_new_i64();
12098 read_vec_element(s
, tcg_op
, rn
, pass
, MO_64
);
12100 handle_2misc_64(s
, opcode
, u
, tcg_res
, tcg_op
,
12101 tcg_rmode
, tcg_fpstatus
);
12103 write_vec_element(s
, tcg_res
, rd
, pass
, MO_64
);
12108 for (pass
= 0; pass
< (is_q
? 4 : 2); pass
++) {
12109 TCGv_i32 tcg_op
= tcg_temp_new_i32();
12110 TCGv_i32 tcg_res
= tcg_temp_new_i32();
12112 read_vec_element_i32(s
, tcg_op
, rn
, pass
, MO_32
);
12115 /* Special cases for 32 bit elements */
12117 case 0x4: /* CLS */
12119 tcg_gen_clzi_i32(tcg_res
, tcg_op
, 32);
12121 tcg_gen_clrsb_i32(tcg_res
, tcg_op
);
12124 case 0x7: /* SQABS, SQNEG */
12126 gen_helper_neon_qneg_s32(tcg_res
, cpu_env
, tcg_op
);
12128 gen_helper_neon_qabs_s32(tcg_res
, cpu_env
, tcg_op
);
12131 case 0x2f: /* FABS */
12132 gen_helper_vfp_abss(tcg_res
, tcg_op
);
12134 case 0x6f: /* FNEG */
12135 gen_helper_vfp_negs(tcg_res
, tcg_op
);
12137 case 0x7f: /* FSQRT */
12138 gen_helper_vfp_sqrts(tcg_res
, tcg_op
, cpu_env
);
12140 case 0x1a: /* FCVTNS */
12141 case 0x1b: /* FCVTMS */
12142 case 0x1c: /* FCVTAS */
12143 case 0x3a: /* FCVTPS */
12144 case 0x3b: /* FCVTZS */
12145 gen_helper_vfp_tosls(tcg_res
, tcg_op
,
12146 tcg_constant_i32(0), tcg_fpstatus
);
12148 case 0x5a: /* FCVTNU */
12149 case 0x5b: /* FCVTMU */
12150 case 0x5c: /* FCVTAU */
12151 case 0x7a: /* FCVTPU */
12152 case 0x7b: /* FCVTZU */
12153 gen_helper_vfp_touls(tcg_res
, tcg_op
,
12154 tcg_constant_i32(0), tcg_fpstatus
);
12156 case 0x18: /* FRINTN */
12157 case 0x19: /* FRINTM */
12158 case 0x38: /* FRINTP */
12159 case 0x39: /* FRINTZ */
12160 case 0x58: /* FRINTA */
12161 case 0x79: /* FRINTI */
12162 gen_helper_rints(tcg_res
, tcg_op
, tcg_fpstatus
);
12164 case 0x59: /* FRINTX */
12165 gen_helper_rints_exact(tcg_res
, tcg_op
, tcg_fpstatus
);
12167 case 0x7c: /* URSQRTE */
12168 gen_helper_rsqrte_u32(tcg_res
, tcg_op
);
12170 case 0x1e: /* FRINT32Z */
12171 case 0x5e: /* FRINT32X */
12172 gen_helper_frint32_s(tcg_res
, tcg_op
, tcg_fpstatus
);
12174 case 0x1f: /* FRINT64Z */
12175 case 0x5f: /* FRINT64X */
12176 gen_helper_frint64_s(tcg_res
, tcg_op
, tcg_fpstatus
);
12179 g_assert_not_reached();
12182 /* Use helpers for 8 and 16 bit elements */
12184 case 0x5: /* CNT, RBIT */
12185 /* For these two insns size is part of the opcode specifier
12186 * (handled earlier); they always operate on byte elements.
12189 gen_helper_neon_rbit_u8(tcg_res
, tcg_op
);
12191 gen_helper_neon_cnt_u8(tcg_res
, tcg_op
);
12194 case 0x7: /* SQABS, SQNEG */
12196 NeonGenOneOpEnvFn
*genfn
;
12197 static NeonGenOneOpEnvFn
* const fns
[2][2] = {
12198 { gen_helper_neon_qabs_s8
, gen_helper_neon_qneg_s8
},
12199 { gen_helper_neon_qabs_s16
, gen_helper_neon_qneg_s16
},
12201 genfn
= fns
[size
][u
];
12202 genfn(tcg_res
, cpu_env
, tcg_op
);
12205 case 0x4: /* CLS, CLZ */
12208 gen_helper_neon_clz_u8(tcg_res
, tcg_op
);
12210 gen_helper_neon_clz_u16(tcg_res
, tcg_op
);
12214 gen_helper_neon_cls_s8(tcg_res
, tcg_op
);
12216 gen_helper_neon_cls_s16(tcg_res
, tcg_op
);
12221 g_assert_not_reached();
12225 write_vec_element_i32(s
, tcg_res
, rd
, pass
, MO_32
);
12228 clear_vec_high(s
, is_q
, rd
);
12231 gen_restore_rmode(tcg_rmode
, tcg_fpstatus
);
12235 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12237 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0
12238 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12239 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd |
12240 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12241 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12242 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12244 * This actually covers two groups where scalar access is governed by
12245 * bit 28. A bunch of the instructions (float to integral) only exist
12246 * in the vector form and are un-allocated for the scalar decode. Also
12247 * in the scalar decode Q is always 1.
12249 static void disas_simd_two_reg_misc_fp16(DisasContext
*s
, uint32_t insn
)
12251 int fpop
, opcode
, a
, u
;
12255 bool only_in_vector
= false;
12258 TCGv_i32 tcg_rmode
= NULL
;
12259 TCGv_ptr tcg_fpstatus
= NULL
;
12260 bool need_fpst
= true;
12263 if (!dc_isar_feature(aa64_fp16
, s
)) {
12264 unallocated_encoding(s
);
12268 rd
= extract32(insn
, 0, 5);
12269 rn
= extract32(insn
, 5, 5);
12271 a
= extract32(insn
, 23, 1);
12272 u
= extract32(insn
, 29, 1);
12273 is_scalar
= extract32(insn
, 28, 1);
12274 is_q
= extract32(insn
, 30, 1);
12276 opcode
= extract32(insn
, 12, 5);
12277 fpop
= deposit32(opcode
, 5, 1, a
);
12278 fpop
= deposit32(fpop
, 6, 1, u
);
12281 case 0x1d: /* SCVTF */
12282 case 0x5d: /* UCVTF */
12289 elements
= (is_q
? 8 : 4);
12292 if (!fp_access_check(s
)) {
12295 handle_simd_intfp_conv(s
, rd
, rn
, elements
, !u
, 0, MO_16
);
12299 case 0x2c: /* FCMGT (zero) */
12300 case 0x2d: /* FCMEQ (zero) */
12301 case 0x2e: /* FCMLT (zero) */
12302 case 0x6c: /* FCMGE (zero) */
12303 case 0x6d: /* FCMLE (zero) */
12304 handle_2misc_fcmp_zero(s
, fpop
, is_scalar
, 0, is_q
, MO_16
, rn
, rd
);
12306 case 0x3d: /* FRECPE */
12307 case 0x3f: /* FRECPX */
12309 case 0x18: /* FRINTN */
12310 only_in_vector
= true;
12311 rmode
= FPROUNDING_TIEEVEN
;
12313 case 0x19: /* FRINTM */
12314 only_in_vector
= true;
12315 rmode
= FPROUNDING_NEGINF
;
12317 case 0x38: /* FRINTP */
12318 only_in_vector
= true;
12319 rmode
= FPROUNDING_POSINF
;
12321 case 0x39: /* FRINTZ */
12322 only_in_vector
= true;
12323 rmode
= FPROUNDING_ZERO
;
12325 case 0x58: /* FRINTA */
12326 only_in_vector
= true;
12327 rmode
= FPROUNDING_TIEAWAY
;
12329 case 0x59: /* FRINTX */
12330 case 0x79: /* FRINTI */
12331 only_in_vector
= true;
12332 /* current rounding mode */
12334 case 0x1a: /* FCVTNS */
12335 rmode
= FPROUNDING_TIEEVEN
;
12337 case 0x1b: /* FCVTMS */
12338 rmode
= FPROUNDING_NEGINF
;
12340 case 0x1c: /* FCVTAS */
12341 rmode
= FPROUNDING_TIEAWAY
;
12343 case 0x3a: /* FCVTPS */
12344 rmode
= FPROUNDING_POSINF
;
12346 case 0x3b: /* FCVTZS */
12347 rmode
= FPROUNDING_ZERO
;
12349 case 0x5a: /* FCVTNU */
12350 rmode
= FPROUNDING_TIEEVEN
;
12352 case 0x5b: /* FCVTMU */
12353 rmode
= FPROUNDING_NEGINF
;
12355 case 0x5c: /* FCVTAU */
12356 rmode
= FPROUNDING_TIEAWAY
;
12358 case 0x7a: /* FCVTPU */
12359 rmode
= FPROUNDING_POSINF
;
12361 case 0x7b: /* FCVTZU */
12362 rmode
= FPROUNDING_ZERO
;
12364 case 0x2f: /* FABS */
12365 case 0x6f: /* FNEG */
12368 case 0x7d: /* FRSQRTE */
12369 case 0x7f: /* FSQRT (vector) */
12372 unallocated_encoding(s
);
12377 /* Check additional constraints for the scalar encoding */
12380 unallocated_encoding(s
);
12383 /* FRINTxx is only in the vector form */
12384 if (only_in_vector
) {
12385 unallocated_encoding(s
);
12390 if (!fp_access_check(s
)) {
12394 if (rmode
>= 0 || need_fpst
) {
12395 tcg_fpstatus
= fpstatus_ptr(FPST_FPCR_F16
);
12399 tcg_rmode
= gen_set_rmode(rmode
, tcg_fpstatus
);
12403 TCGv_i32 tcg_op
= read_fp_hreg(s
, rn
);
12404 TCGv_i32 tcg_res
= tcg_temp_new_i32();
12407 case 0x1a: /* FCVTNS */
12408 case 0x1b: /* FCVTMS */
12409 case 0x1c: /* FCVTAS */
12410 case 0x3a: /* FCVTPS */
12411 case 0x3b: /* FCVTZS */
12412 gen_helper_advsimd_f16tosinth(tcg_res
, tcg_op
, tcg_fpstatus
);
12414 case 0x3d: /* FRECPE */
12415 gen_helper_recpe_f16(tcg_res
, tcg_op
, tcg_fpstatus
);
12417 case 0x3f: /* FRECPX */
12418 gen_helper_frecpx_f16(tcg_res
, tcg_op
, tcg_fpstatus
);
12420 case 0x5a: /* FCVTNU */
12421 case 0x5b: /* FCVTMU */
12422 case 0x5c: /* FCVTAU */
12423 case 0x7a: /* FCVTPU */
12424 case 0x7b: /* FCVTZU */
12425 gen_helper_advsimd_f16touinth(tcg_res
, tcg_op
, tcg_fpstatus
);
12427 case 0x6f: /* FNEG */
12428 tcg_gen_xori_i32(tcg_res
, tcg_op
, 0x8000);
12430 case 0x7d: /* FRSQRTE */
12431 gen_helper_rsqrte_f16(tcg_res
, tcg_op
, tcg_fpstatus
);
12434 g_assert_not_reached();
12437 /* limit any sign extension going on */
12438 tcg_gen_andi_i32(tcg_res
, tcg_res
, 0xffff);
12439 write_fp_sreg(s
, rd
, tcg_res
);
12441 for (pass
= 0; pass
< (is_q
? 8 : 4); pass
++) {
12442 TCGv_i32 tcg_op
= tcg_temp_new_i32();
12443 TCGv_i32 tcg_res
= tcg_temp_new_i32();
12445 read_vec_element_i32(s
, tcg_op
, rn
, pass
, MO_16
);
12448 case 0x1a: /* FCVTNS */
12449 case 0x1b: /* FCVTMS */
12450 case 0x1c: /* FCVTAS */
12451 case 0x3a: /* FCVTPS */
12452 case 0x3b: /* FCVTZS */
12453 gen_helper_advsimd_f16tosinth(tcg_res
, tcg_op
, tcg_fpstatus
);
12455 case 0x3d: /* FRECPE */
12456 gen_helper_recpe_f16(tcg_res
, tcg_op
, tcg_fpstatus
);
12458 case 0x5a: /* FCVTNU */
12459 case 0x5b: /* FCVTMU */
12460 case 0x5c: /* FCVTAU */
12461 case 0x7a: /* FCVTPU */
12462 case 0x7b: /* FCVTZU */
12463 gen_helper_advsimd_f16touinth(tcg_res
, tcg_op
, tcg_fpstatus
);
12465 case 0x18: /* FRINTN */
12466 case 0x19: /* FRINTM */
12467 case 0x38: /* FRINTP */
12468 case 0x39: /* FRINTZ */
12469 case 0x58: /* FRINTA */
12470 case 0x79: /* FRINTI */
12471 gen_helper_advsimd_rinth(tcg_res
, tcg_op
, tcg_fpstatus
);
12473 case 0x59: /* FRINTX */
12474 gen_helper_advsimd_rinth_exact(tcg_res
, tcg_op
, tcg_fpstatus
);
12476 case 0x2f: /* FABS */
12477 tcg_gen_andi_i32(tcg_res
, tcg_op
, 0x7fff);
12479 case 0x6f: /* FNEG */
12480 tcg_gen_xori_i32(tcg_res
, tcg_op
, 0x8000);
12482 case 0x7d: /* FRSQRTE */
12483 gen_helper_rsqrte_f16(tcg_res
, tcg_op
, tcg_fpstatus
);
12485 case 0x7f: /* FSQRT */
12486 gen_helper_sqrt_f16(tcg_res
, tcg_op
, tcg_fpstatus
);
12489 g_assert_not_reached();
12492 write_vec_element_i32(s
, tcg_res
, rd
, pass
, MO_16
);
12495 clear_vec_high(s
, is_q
, rd
);
12499 gen_restore_rmode(tcg_rmode
, tcg_fpstatus
);
12503 /* AdvSIMD scalar x indexed element
12504 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
12505 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12506 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
12507 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12508 * AdvSIMD vector x indexed element
12509 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
12510 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12511 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
12512 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12514 static void disas_simd_indexed(DisasContext
*s
, uint32_t insn
)
12516 /* This encoding has two kinds of instruction:
12517 * normal, where we perform elt x idxelt => elt for each
12518 * element in the vector
12519 * long, where we perform elt x idxelt and generate a result of
12520 * double the width of the input element
12521 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12523 bool is_scalar
= extract32(insn
, 28, 1);
12524 bool is_q
= extract32(insn
, 30, 1);
12525 bool u
= extract32(insn
, 29, 1);
12526 int size
= extract32(insn
, 22, 2);
12527 int l
= extract32(insn
, 21, 1);
12528 int m
= extract32(insn
, 20, 1);
12529 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12530 int rm
= extract32(insn
, 16, 4);
12531 int opcode
= extract32(insn
, 12, 4);
12532 int h
= extract32(insn
, 11, 1);
12533 int rn
= extract32(insn
, 5, 5);
12534 int rd
= extract32(insn
, 0, 5);
12535 bool is_long
= false;
12537 bool is_fp16
= false;
12541 switch (16 * u
+ opcode
) {
12542 case 0x08: /* MUL */
12543 case 0x10: /* MLA */
12544 case 0x14: /* MLS */
12546 unallocated_encoding(s
);
12550 case 0x02: /* SMLAL, SMLAL2 */
12551 case 0x12: /* UMLAL, UMLAL2 */
12552 case 0x06: /* SMLSL, SMLSL2 */
12553 case 0x16: /* UMLSL, UMLSL2 */
12554 case 0x0a: /* SMULL, SMULL2 */
12555 case 0x1a: /* UMULL, UMULL2 */
12557 unallocated_encoding(s
);
12562 case 0x03: /* SQDMLAL, SQDMLAL2 */
12563 case 0x07: /* SQDMLSL, SQDMLSL2 */
12564 case 0x0b: /* SQDMULL, SQDMULL2 */
12567 case 0x0c: /* SQDMULH */
12568 case 0x0d: /* SQRDMULH */
12570 case 0x01: /* FMLA */
12571 case 0x05: /* FMLS */
12572 case 0x09: /* FMUL */
12573 case 0x19: /* FMULX */
12576 case 0x1d: /* SQRDMLAH */
12577 case 0x1f: /* SQRDMLSH */
12578 if (!dc_isar_feature(aa64_rdm
, s
)) {
12579 unallocated_encoding(s
);
12583 case 0x0e: /* SDOT */
12584 case 0x1e: /* UDOT */
12585 if (is_scalar
|| size
!= MO_32
|| !dc_isar_feature(aa64_dp
, s
)) {
12586 unallocated_encoding(s
);
12592 case 0: /* SUDOT */
12593 case 2: /* USDOT */
12594 if (is_scalar
|| !dc_isar_feature(aa64_i8mm
, s
)) {
12595 unallocated_encoding(s
);
12600 case 1: /* BFDOT */
12601 if (is_scalar
|| !dc_isar_feature(aa64_bf16
, s
)) {
12602 unallocated_encoding(s
);
12607 case 3: /* BFMLAL{B,T} */
12608 if (is_scalar
|| !dc_isar_feature(aa64_bf16
, s
)) {
12609 unallocated_encoding(s
);
12612 /* can't set is_fp without other incorrect size checks */
12616 unallocated_encoding(s
);
12620 case 0x11: /* FCMLA #0 */
12621 case 0x13: /* FCMLA #90 */
12622 case 0x15: /* FCMLA #180 */
12623 case 0x17: /* FCMLA #270 */
12624 if (is_scalar
|| !dc_isar_feature(aa64_fcma
, s
)) {
12625 unallocated_encoding(s
);
12630 case 0x00: /* FMLAL */
12631 case 0x04: /* FMLSL */
12632 case 0x18: /* FMLAL2 */
12633 case 0x1c: /* FMLSL2 */
12634 if (is_scalar
|| size
!= MO_32
|| !dc_isar_feature(aa64_fhm
, s
)) {
12635 unallocated_encoding(s
);
12639 /* is_fp, but we pass cpu_env not fp_status. */
12642 unallocated_encoding(s
);
12647 case 1: /* normal fp */
12648 /* convert insn encoded size to MemOp size */
12650 case 0: /* half-precision */
12654 case MO_32
: /* single precision */
12655 case MO_64
: /* double precision */
12658 unallocated_encoding(s
);
12663 case 2: /* complex fp */
12664 /* Each indexable element is a complex pair. */
12669 unallocated_encoding(s
);
12677 unallocated_encoding(s
);
12682 default: /* integer */
12686 unallocated_encoding(s
);
12691 if (is_fp16
&& !dc_isar_feature(aa64_fp16
, s
)) {
12692 unallocated_encoding(s
);
12696 /* Given MemOp size, adjust register and indexing. */
12699 index
= h
<< 2 | l
<< 1 | m
;
12702 index
= h
<< 1 | l
;
12707 unallocated_encoding(s
);
12714 g_assert_not_reached();
12717 if (!fp_access_check(s
)) {
12722 fpst
= fpstatus_ptr(is_fp16
? FPST_FPCR_F16
: FPST_FPCR
);
12727 switch (16 * u
+ opcode
) {
12728 case 0x0e: /* SDOT */
12729 case 0x1e: /* UDOT */
12730 gen_gvec_op4_ool(s
, is_q
, rd
, rn
, rm
, rd
, index
,
12731 u
? gen_helper_gvec_udot_idx_b
12732 : gen_helper_gvec_sdot_idx_b
);
12735 switch (extract32(insn
, 22, 2)) {
12736 case 0: /* SUDOT */
12737 gen_gvec_op4_ool(s
, is_q
, rd
, rn
, rm
, rd
, index
,
12738 gen_helper_gvec_sudot_idx_b
);
12740 case 1: /* BFDOT */
12741 gen_gvec_op4_ool(s
, is_q
, rd
, rn
, rm
, rd
, index
,
12742 gen_helper_gvec_bfdot_idx
);
12744 case 2: /* USDOT */
12745 gen_gvec_op4_ool(s
, is_q
, rd
, rn
, rm
, rd
, index
,
12746 gen_helper_gvec_usdot_idx_b
);
12748 case 3: /* BFMLAL{B,T} */
12749 gen_gvec_op4_fpst(s
, 1, rd
, rn
, rm
, rd
, 0, (index
<< 1) | is_q
,
12750 gen_helper_gvec_bfmlal_idx
);
12753 g_assert_not_reached();
12754 case 0x11: /* FCMLA #0 */
12755 case 0x13: /* FCMLA #90 */
12756 case 0x15: /* FCMLA #180 */
12757 case 0x17: /* FCMLA #270 */
12759 int rot
= extract32(insn
, 13, 2);
12760 int data
= (index
<< 2) | rot
;
12761 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, rd
),
12762 vec_full_reg_offset(s
, rn
),
12763 vec_full_reg_offset(s
, rm
),
12764 vec_full_reg_offset(s
, rd
), fpst
,
12765 is_q
? 16 : 8, vec_full_reg_size(s
), data
,
12767 ? gen_helper_gvec_fcmlas_idx
12768 : gen_helper_gvec_fcmlah_idx
);
12772 case 0x00: /* FMLAL */
12773 case 0x04: /* FMLSL */
12774 case 0x18: /* FMLAL2 */
12775 case 0x1c: /* FMLSL2 */
12777 int is_s
= extract32(opcode
, 2, 1);
12779 int data
= (index
<< 2) | (is_2
<< 1) | is_s
;
12780 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
12781 vec_full_reg_offset(s
, rn
),
12782 vec_full_reg_offset(s
, rm
), cpu_env
,
12783 is_q
? 16 : 8, vec_full_reg_size(s
),
12784 data
, gen_helper_gvec_fmlal_idx_a64
);
12788 case 0x08: /* MUL */
12789 if (!is_long
&& !is_scalar
) {
12790 static gen_helper_gvec_3
* const fns
[3] = {
12791 gen_helper_gvec_mul_idx_h
,
12792 gen_helper_gvec_mul_idx_s
,
12793 gen_helper_gvec_mul_idx_d
,
12795 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
12796 vec_full_reg_offset(s
, rn
),
12797 vec_full_reg_offset(s
, rm
),
12798 is_q
? 16 : 8, vec_full_reg_size(s
),
12799 index
, fns
[size
- 1]);
12804 case 0x10: /* MLA */
12805 if (!is_long
&& !is_scalar
) {
12806 static gen_helper_gvec_4
* const fns
[3] = {
12807 gen_helper_gvec_mla_idx_h
,
12808 gen_helper_gvec_mla_idx_s
,
12809 gen_helper_gvec_mla_idx_d
,
12811 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, rd
),
12812 vec_full_reg_offset(s
, rn
),
12813 vec_full_reg_offset(s
, rm
),
12814 vec_full_reg_offset(s
, rd
),
12815 is_q
? 16 : 8, vec_full_reg_size(s
),
12816 index
, fns
[size
- 1]);
12821 case 0x14: /* MLS */
12822 if (!is_long
&& !is_scalar
) {
12823 static gen_helper_gvec_4
* const fns
[3] = {
12824 gen_helper_gvec_mls_idx_h
,
12825 gen_helper_gvec_mls_idx_s
,
12826 gen_helper_gvec_mls_idx_d
,
12828 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, rd
),
12829 vec_full_reg_offset(s
, rn
),
12830 vec_full_reg_offset(s
, rm
),
12831 vec_full_reg_offset(s
, rd
),
12832 is_q
? 16 : 8, vec_full_reg_size(s
),
12833 index
, fns
[size
- 1]);
12840 TCGv_i64 tcg_idx
= tcg_temp_new_i64();
12843 assert(is_fp
&& is_q
&& !is_long
);
12845 read_vec_element(s
, tcg_idx
, rm
, index
, MO_64
);
12847 for (pass
= 0; pass
< (is_scalar
? 1 : 2); pass
++) {
12848 TCGv_i64 tcg_op
= tcg_temp_new_i64();
12849 TCGv_i64 tcg_res
= tcg_temp_new_i64();
12851 read_vec_element(s
, tcg_op
, rn
, pass
, MO_64
);
12853 switch (16 * u
+ opcode
) {
12854 case 0x05: /* FMLS */
12855 /* As usual for ARM, separate negation for fused multiply-add */
12856 gen_helper_vfp_negd(tcg_op
, tcg_op
);
12858 case 0x01: /* FMLA */
12859 read_vec_element(s
, tcg_res
, rd
, pass
, MO_64
);
12860 gen_helper_vfp_muladdd(tcg_res
, tcg_op
, tcg_idx
, tcg_res
, fpst
);
12862 case 0x09: /* FMUL */
12863 gen_helper_vfp_muld(tcg_res
, tcg_op
, tcg_idx
, fpst
);
12865 case 0x19: /* FMULX */
12866 gen_helper_vfp_mulxd(tcg_res
, tcg_op
, tcg_idx
, fpst
);
12869 g_assert_not_reached();
12872 write_vec_element(s
, tcg_res
, rd
, pass
, MO_64
);
12875 clear_vec_high(s
, !is_scalar
, rd
);
12876 } else if (!is_long
) {
12877 /* 32 bit floating point, or 16 or 32 bit integer.
12878 * For the 16 bit scalar case we use the usual Neon helpers and
12879 * rely on the fact that 0 op 0 == 0 with no side effects.
12881 TCGv_i32 tcg_idx
= tcg_temp_new_i32();
12882 int pass
, maxpasses
;
12887 maxpasses
= is_q
? 4 : 2;
12890 read_vec_element_i32(s
, tcg_idx
, rm
, index
, size
);
12892 if (size
== 1 && !is_scalar
) {
12893 /* The simplest way to handle the 16x16 indexed ops is to duplicate
12894 * the index into both halves of the 32 bit tcg_idx and then use
12895 * the usual Neon helpers.
12897 tcg_gen_deposit_i32(tcg_idx
, tcg_idx
, tcg_idx
, 16, 16);
12900 for (pass
= 0; pass
< maxpasses
; pass
++) {
12901 TCGv_i32 tcg_op
= tcg_temp_new_i32();
12902 TCGv_i32 tcg_res
= tcg_temp_new_i32();
12904 read_vec_element_i32(s
, tcg_op
, rn
, pass
, is_scalar
? size
: MO_32
);
12906 switch (16 * u
+ opcode
) {
12907 case 0x08: /* MUL */
12908 case 0x10: /* MLA */
12909 case 0x14: /* MLS */
12911 static NeonGenTwoOpFn
* const fns
[2][2] = {
12912 { gen_helper_neon_add_u16
, gen_helper_neon_sub_u16
},
12913 { tcg_gen_add_i32
, tcg_gen_sub_i32
},
12915 NeonGenTwoOpFn
*genfn
;
12916 bool is_sub
= opcode
== 0x4;
12919 gen_helper_neon_mul_u16(tcg_res
, tcg_op
, tcg_idx
);
12921 tcg_gen_mul_i32(tcg_res
, tcg_op
, tcg_idx
);
12923 if (opcode
== 0x8) {
12926 read_vec_element_i32(s
, tcg_op
, rd
, pass
, MO_32
);
12927 genfn
= fns
[size
- 1][is_sub
];
12928 genfn(tcg_res
, tcg_op
, tcg_res
);
12931 case 0x05: /* FMLS */
12932 case 0x01: /* FMLA */
12933 read_vec_element_i32(s
, tcg_res
, rd
, pass
,
12934 is_scalar
? size
: MO_32
);
12937 if (opcode
== 0x5) {
12938 /* As usual for ARM, separate negation for fused
12940 tcg_gen_xori_i32(tcg_op
, tcg_op
, 0x80008000);
12943 gen_helper_advsimd_muladdh(tcg_res
, tcg_op
, tcg_idx
,
12946 gen_helper_advsimd_muladd2h(tcg_res
, tcg_op
, tcg_idx
,
12951 if (opcode
== 0x5) {
12952 /* As usual for ARM, separate negation for
12953 * fused multiply-add */
12954 tcg_gen_xori_i32(tcg_op
, tcg_op
, 0x80000000);
12956 gen_helper_vfp_muladds(tcg_res
, tcg_op
, tcg_idx
,
12960 g_assert_not_reached();
12963 case 0x09: /* FMUL */
12967 gen_helper_advsimd_mulh(tcg_res
, tcg_op
,
12970 gen_helper_advsimd_mul2h(tcg_res
, tcg_op
,
12975 gen_helper_vfp_muls(tcg_res
, tcg_op
, tcg_idx
, fpst
);
12978 g_assert_not_reached();
12981 case 0x19: /* FMULX */
12985 gen_helper_advsimd_mulxh(tcg_res
, tcg_op
,
12988 gen_helper_advsimd_mulx2h(tcg_res
, tcg_op
,
12993 gen_helper_vfp_mulxs(tcg_res
, tcg_op
, tcg_idx
, fpst
);
12996 g_assert_not_reached();
12999 case 0x0c: /* SQDMULH */
13001 gen_helper_neon_qdmulh_s16(tcg_res
, cpu_env
,
13004 gen_helper_neon_qdmulh_s32(tcg_res
, cpu_env
,
13008 case 0x0d: /* SQRDMULH */
13010 gen_helper_neon_qrdmulh_s16(tcg_res
, cpu_env
,
13013 gen_helper_neon_qrdmulh_s32(tcg_res
, cpu_env
,
13017 case 0x1d: /* SQRDMLAH */
13018 read_vec_element_i32(s
, tcg_res
, rd
, pass
,
13019 is_scalar
? size
: MO_32
);
13021 gen_helper_neon_qrdmlah_s16(tcg_res
, cpu_env
,
13022 tcg_op
, tcg_idx
, tcg_res
);
13024 gen_helper_neon_qrdmlah_s32(tcg_res
, cpu_env
,
13025 tcg_op
, tcg_idx
, tcg_res
);
13028 case 0x1f: /* SQRDMLSH */
13029 read_vec_element_i32(s
, tcg_res
, rd
, pass
,
13030 is_scalar
? size
: MO_32
);
13032 gen_helper_neon_qrdmlsh_s16(tcg_res
, cpu_env
,
13033 tcg_op
, tcg_idx
, tcg_res
);
13035 gen_helper_neon_qrdmlsh_s32(tcg_res
, cpu_env
,
13036 tcg_op
, tcg_idx
, tcg_res
);
13040 g_assert_not_reached();
13044 write_fp_sreg(s
, rd
, tcg_res
);
13046 write_vec_element_i32(s
, tcg_res
, rd
, pass
, MO_32
);
13050 clear_vec_high(s
, is_q
, rd
);
13052 /* long ops: 16x16->32 or 32x32->64 */
13053 TCGv_i64 tcg_res
[2];
13055 bool satop
= extract32(opcode
, 0, 1);
13056 MemOp memop
= MO_32
;
13063 TCGv_i64 tcg_idx
= tcg_temp_new_i64();
13065 read_vec_element(s
, tcg_idx
, rm
, index
, memop
);
13067 for (pass
= 0; pass
< (is_scalar
? 1 : 2); pass
++) {
13068 TCGv_i64 tcg_op
= tcg_temp_new_i64();
13069 TCGv_i64 tcg_passres
;
13075 passelt
= pass
+ (is_q
* 2);
13078 read_vec_element(s
, tcg_op
, rn
, passelt
, memop
);
13080 tcg_res
[pass
] = tcg_temp_new_i64();
13082 if (opcode
== 0xa || opcode
== 0xb) {
13083 /* Non-accumulating ops */
13084 tcg_passres
= tcg_res
[pass
];
13086 tcg_passres
= tcg_temp_new_i64();
13089 tcg_gen_mul_i64(tcg_passres
, tcg_op
, tcg_idx
);
13092 /* saturating, doubling */
13093 gen_helper_neon_addl_saturate_s64(tcg_passres
, cpu_env
,
13094 tcg_passres
, tcg_passres
);
13097 if (opcode
== 0xa || opcode
== 0xb) {
13101 /* Accumulating op: handle accumulate step */
13102 read_vec_element(s
, tcg_res
[pass
], rd
, pass
, MO_64
);
13105 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13106 tcg_gen_add_i64(tcg_res
[pass
], tcg_res
[pass
], tcg_passres
);
13108 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13109 tcg_gen_sub_i64(tcg_res
[pass
], tcg_res
[pass
], tcg_passres
);
13111 case 0x7: /* SQDMLSL, SQDMLSL2 */
13112 tcg_gen_neg_i64(tcg_passres
, tcg_passres
);
13114 case 0x3: /* SQDMLAL, SQDMLAL2 */
13115 gen_helper_neon_addl_saturate_s64(tcg_res
[pass
], cpu_env
,
13120 g_assert_not_reached();
13124 clear_vec_high(s
, !is_scalar
, rd
);
13126 TCGv_i32 tcg_idx
= tcg_temp_new_i32();
13129 read_vec_element_i32(s
, tcg_idx
, rm
, index
, size
);
13132 /* The simplest way to handle the 16x16 indexed ops is to
13133 * duplicate the index into both halves of the 32 bit tcg_idx
13134 * and then use the usual Neon helpers.
13136 tcg_gen_deposit_i32(tcg_idx
, tcg_idx
, tcg_idx
, 16, 16);
13139 for (pass
= 0; pass
< (is_scalar
? 1 : 2); pass
++) {
13140 TCGv_i32 tcg_op
= tcg_temp_new_i32();
13141 TCGv_i64 tcg_passres
;
13144 read_vec_element_i32(s
, tcg_op
, rn
, pass
, size
);
13146 read_vec_element_i32(s
, tcg_op
, rn
,
13147 pass
+ (is_q
* 2), MO_32
);
13150 tcg_res
[pass
] = tcg_temp_new_i64();
13152 if (opcode
== 0xa || opcode
== 0xb) {
13153 /* Non-accumulating ops */
13154 tcg_passres
= tcg_res
[pass
];
13156 tcg_passres
= tcg_temp_new_i64();
13159 if (memop
& MO_SIGN
) {
13160 gen_helper_neon_mull_s16(tcg_passres
, tcg_op
, tcg_idx
);
13162 gen_helper_neon_mull_u16(tcg_passres
, tcg_op
, tcg_idx
);
13165 gen_helper_neon_addl_saturate_s32(tcg_passres
, cpu_env
,
13166 tcg_passres
, tcg_passres
);
13169 if (opcode
== 0xa || opcode
== 0xb) {
13173 /* Accumulating op: handle accumulate step */
13174 read_vec_element(s
, tcg_res
[pass
], rd
, pass
, MO_64
);
13177 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13178 gen_helper_neon_addl_u32(tcg_res
[pass
], tcg_res
[pass
],
13181 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13182 gen_helper_neon_subl_u32(tcg_res
[pass
], tcg_res
[pass
],
13185 case 0x7: /* SQDMLSL, SQDMLSL2 */
13186 gen_helper_neon_negl_u32(tcg_passres
, tcg_passres
);
13188 case 0x3: /* SQDMLAL, SQDMLAL2 */
13189 gen_helper_neon_addl_saturate_s32(tcg_res
[pass
], cpu_env
,
13194 g_assert_not_reached();
13199 tcg_gen_ext32u_i64(tcg_res
[0], tcg_res
[0]);
13204 tcg_res
[1] = tcg_constant_i64(0);
13207 for (pass
= 0; pass
< 2; pass
++) {
13208 write_vec_element(s
, tcg_res
[pass
], rd
, pass
, MO_64
);
13214 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
13215 * +-----------------+------+-----------+--------+-----+------+------+
13216 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
13217 * +-----------------+------+-----------+--------+-----+------+------+
13219 static void disas_crypto_aes(DisasContext
*s
, uint32_t insn
)
13221 int size
= extract32(insn
, 22, 2);
13222 int opcode
= extract32(insn
, 12, 5);
13223 int rn
= extract32(insn
, 5, 5);
13224 int rd
= extract32(insn
, 0, 5);
13225 gen_helper_gvec_2
*genfn2
= NULL
;
13226 gen_helper_gvec_3
*genfn3
= NULL
;
13228 if (!dc_isar_feature(aa64_aes
, s
) || size
!= 0) {
13229 unallocated_encoding(s
);
13234 case 0x4: /* AESE */
13235 genfn3
= gen_helper_crypto_aese
;
13237 case 0x6: /* AESMC */
13238 genfn2
= gen_helper_crypto_aesmc
;
13240 case 0x5: /* AESD */
13241 genfn3
= gen_helper_crypto_aesd
;
13243 case 0x7: /* AESIMC */
13244 genfn2
= gen_helper_crypto_aesimc
;
13247 unallocated_encoding(s
);
13251 if (!fp_access_check(s
)) {
13255 gen_gvec_op2_ool(s
, true, rd
, rn
, 0, genfn2
);
13257 gen_gvec_op3_ool(s
, true, rd
, rd
, rn
, 0, genfn3
);
13261 /* Crypto three-reg SHA
13262 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
13263 * +-----------------+------+---+------+---+--------+-----+------+------+
13264 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
13265 * +-----------------+------+---+------+---+--------+-----+------+------+
13267 static void disas_crypto_three_reg_sha(DisasContext
*s
, uint32_t insn
)
13269 int size
= extract32(insn
, 22, 2);
13270 int opcode
= extract32(insn
, 12, 3);
13271 int rm
= extract32(insn
, 16, 5);
13272 int rn
= extract32(insn
, 5, 5);
13273 int rd
= extract32(insn
, 0, 5);
13274 gen_helper_gvec_3
*genfn
;
13278 unallocated_encoding(s
);
13283 case 0: /* SHA1C */
13284 genfn
= gen_helper_crypto_sha1c
;
13285 feature
= dc_isar_feature(aa64_sha1
, s
);
13287 case 1: /* SHA1P */
13288 genfn
= gen_helper_crypto_sha1p
;
13289 feature
= dc_isar_feature(aa64_sha1
, s
);
13291 case 2: /* SHA1M */
13292 genfn
= gen_helper_crypto_sha1m
;
13293 feature
= dc_isar_feature(aa64_sha1
, s
);
13295 case 3: /* SHA1SU0 */
13296 genfn
= gen_helper_crypto_sha1su0
;
13297 feature
= dc_isar_feature(aa64_sha1
, s
);
13299 case 4: /* SHA256H */
13300 genfn
= gen_helper_crypto_sha256h
;
13301 feature
= dc_isar_feature(aa64_sha256
, s
);
13303 case 5: /* SHA256H2 */
13304 genfn
= gen_helper_crypto_sha256h2
;
13305 feature
= dc_isar_feature(aa64_sha256
, s
);
13307 case 6: /* SHA256SU1 */
13308 genfn
= gen_helper_crypto_sha256su1
;
13309 feature
= dc_isar_feature(aa64_sha256
, s
);
13312 unallocated_encoding(s
);
13317 unallocated_encoding(s
);
13321 if (!fp_access_check(s
)) {
13324 gen_gvec_op3_ool(s
, true, rd
, rn
, rm
, 0, genfn
);
13327 /* Crypto two-reg SHA
13328 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
13329 * +-----------------+------+-----------+--------+-----+------+------+
13330 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
13331 * +-----------------+------+-----------+--------+-----+------+------+
13333 static void disas_crypto_two_reg_sha(DisasContext
*s
, uint32_t insn
)
13335 int size
= extract32(insn
, 22, 2);
13336 int opcode
= extract32(insn
, 12, 5);
13337 int rn
= extract32(insn
, 5, 5);
13338 int rd
= extract32(insn
, 0, 5);
13339 gen_helper_gvec_2
*genfn
;
13343 unallocated_encoding(s
);
13348 case 0: /* SHA1H */
13349 feature
= dc_isar_feature(aa64_sha1
, s
);
13350 genfn
= gen_helper_crypto_sha1h
;
13352 case 1: /* SHA1SU1 */
13353 feature
= dc_isar_feature(aa64_sha1
, s
);
13354 genfn
= gen_helper_crypto_sha1su1
;
13356 case 2: /* SHA256SU0 */
13357 feature
= dc_isar_feature(aa64_sha256
, s
);
13358 genfn
= gen_helper_crypto_sha256su0
;
13361 unallocated_encoding(s
);
13366 unallocated_encoding(s
);
13370 if (!fp_access_check(s
)) {
13373 gen_gvec_op2_ool(s
, true, rd
, rn
, 0, genfn
);
13376 static void gen_rax1_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
)
13378 tcg_gen_rotli_i64(d
, m
, 1);
13379 tcg_gen_xor_i64(d
, d
, n
);
13382 static void gen_rax1_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
, TCGv_vec m
)
13384 tcg_gen_rotli_vec(vece
, d
, m
, 1);
13385 tcg_gen_xor_vec(vece
, d
, d
, n
);
13388 void gen_gvec_rax1(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
13389 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
13391 static const TCGOpcode vecop_list
[] = { INDEX_op_rotli_vec
, 0 };
13392 static const GVecGen3 op
= {
13393 .fni8
= gen_rax1_i64
,
13394 .fniv
= gen_rax1_vec
,
13395 .opt_opc
= vecop_list
,
13396 .fno
= gen_helper_crypto_rax1
,
13399 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &op
);
13402 /* Crypto three-reg SHA512
13403 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
13404 * +-----------------------+------+---+---+-----+--------+------+------+
13405 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd |
13406 * +-----------------------+------+---+---+-----+--------+------+------+
13408 static void disas_crypto_three_reg_sha512(DisasContext
*s
, uint32_t insn
)
13410 int opcode
= extract32(insn
, 10, 2);
13411 int o
= extract32(insn
, 14, 1);
13412 int rm
= extract32(insn
, 16, 5);
13413 int rn
= extract32(insn
, 5, 5);
13414 int rd
= extract32(insn
, 0, 5);
13416 gen_helper_gvec_3
*oolfn
= NULL
;
13417 GVecGen3Fn
*gvecfn
= NULL
;
13421 case 0: /* SHA512H */
13422 feature
= dc_isar_feature(aa64_sha512
, s
);
13423 oolfn
= gen_helper_crypto_sha512h
;
13425 case 1: /* SHA512H2 */
13426 feature
= dc_isar_feature(aa64_sha512
, s
);
13427 oolfn
= gen_helper_crypto_sha512h2
;
13429 case 2: /* SHA512SU1 */
13430 feature
= dc_isar_feature(aa64_sha512
, s
);
13431 oolfn
= gen_helper_crypto_sha512su1
;
13434 feature
= dc_isar_feature(aa64_sha3
, s
);
13435 gvecfn
= gen_gvec_rax1
;
13438 g_assert_not_reached();
13442 case 0: /* SM3PARTW1 */
13443 feature
= dc_isar_feature(aa64_sm3
, s
);
13444 oolfn
= gen_helper_crypto_sm3partw1
;
13446 case 1: /* SM3PARTW2 */
13447 feature
= dc_isar_feature(aa64_sm3
, s
);
13448 oolfn
= gen_helper_crypto_sm3partw2
;
13450 case 2: /* SM4EKEY */
13451 feature
= dc_isar_feature(aa64_sm4
, s
);
13452 oolfn
= gen_helper_crypto_sm4ekey
;
13455 unallocated_encoding(s
);
13461 unallocated_encoding(s
);
13465 if (!fp_access_check(s
)) {
13470 gen_gvec_op3_ool(s
, true, rd
, rn
, rm
, 0, oolfn
);
13472 gen_gvec_fn3(s
, true, rd
, rn
, rm
, gvecfn
, MO_64
);
13476 /* Crypto two-reg SHA512
13477 * 31 12 11 10 9 5 4 0
13478 * +-----------------------------------------+--------+------+------+
13479 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd |
13480 * +-----------------------------------------+--------+------+------+
13482 static void disas_crypto_two_reg_sha512(DisasContext
*s
, uint32_t insn
)
13484 int opcode
= extract32(insn
, 10, 2);
13485 int rn
= extract32(insn
, 5, 5);
13486 int rd
= extract32(insn
, 0, 5);
13490 case 0: /* SHA512SU0 */
13491 feature
= dc_isar_feature(aa64_sha512
, s
);
13494 feature
= dc_isar_feature(aa64_sm4
, s
);
13497 unallocated_encoding(s
);
13502 unallocated_encoding(s
);
13506 if (!fp_access_check(s
)) {
13511 case 0: /* SHA512SU0 */
13512 gen_gvec_op2_ool(s
, true, rd
, rn
, 0, gen_helper_crypto_sha512su0
);
13515 gen_gvec_op3_ool(s
, true, rd
, rd
, rn
, 0, gen_helper_crypto_sm4e
);
13518 g_assert_not_reached();
13522 /* Crypto four-register
13523 * 31 23 22 21 20 16 15 14 10 9 5 4 0
13524 * +-------------------+-----+------+---+------+------+------+
13525 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd |
13526 * +-------------------+-----+------+---+------+------+------+
13528 static void disas_crypto_four_reg(DisasContext
*s
, uint32_t insn
)
13530 int op0
= extract32(insn
, 21, 2);
13531 int rm
= extract32(insn
, 16, 5);
13532 int ra
= extract32(insn
, 10, 5);
13533 int rn
= extract32(insn
, 5, 5);
13534 int rd
= extract32(insn
, 0, 5);
13540 feature
= dc_isar_feature(aa64_sha3
, s
);
13542 case 2: /* SM3SS1 */
13543 feature
= dc_isar_feature(aa64_sm3
, s
);
13546 unallocated_encoding(s
);
13551 unallocated_encoding(s
);
13555 if (!fp_access_check(s
)) {
13560 TCGv_i64 tcg_op1
, tcg_op2
, tcg_op3
, tcg_res
[2];
13563 tcg_op1
= tcg_temp_new_i64();
13564 tcg_op2
= tcg_temp_new_i64();
13565 tcg_op3
= tcg_temp_new_i64();
13566 tcg_res
[0] = tcg_temp_new_i64();
13567 tcg_res
[1] = tcg_temp_new_i64();
13569 for (pass
= 0; pass
< 2; pass
++) {
13570 read_vec_element(s
, tcg_op1
, rn
, pass
, MO_64
);
13571 read_vec_element(s
, tcg_op2
, rm
, pass
, MO_64
);
13572 read_vec_element(s
, tcg_op3
, ra
, pass
, MO_64
);
13576 tcg_gen_xor_i64(tcg_res
[pass
], tcg_op2
, tcg_op3
);
13579 tcg_gen_andc_i64(tcg_res
[pass
], tcg_op2
, tcg_op3
);
13581 tcg_gen_xor_i64(tcg_res
[pass
], tcg_res
[pass
], tcg_op1
);
13583 write_vec_element(s
, tcg_res
[0], rd
, 0, MO_64
);
13584 write_vec_element(s
, tcg_res
[1], rd
, 1, MO_64
);
13586 TCGv_i32 tcg_op1
, tcg_op2
, tcg_op3
, tcg_res
, tcg_zero
;
13588 tcg_op1
= tcg_temp_new_i32();
13589 tcg_op2
= tcg_temp_new_i32();
13590 tcg_op3
= tcg_temp_new_i32();
13591 tcg_res
= tcg_temp_new_i32();
13592 tcg_zero
= tcg_constant_i32(0);
13594 read_vec_element_i32(s
, tcg_op1
, rn
, 3, MO_32
);
13595 read_vec_element_i32(s
, tcg_op2
, rm
, 3, MO_32
);
13596 read_vec_element_i32(s
, tcg_op3
, ra
, 3, MO_32
);
13598 tcg_gen_rotri_i32(tcg_res
, tcg_op1
, 20);
13599 tcg_gen_add_i32(tcg_res
, tcg_res
, tcg_op2
);
13600 tcg_gen_add_i32(tcg_res
, tcg_res
, tcg_op3
);
13601 tcg_gen_rotri_i32(tcg_res
, tcg_res
, 25);
13603 write_vec_element_i32(s
, tcg_zero
, rd
, 0, MO_32
);
13604 write_vec_element_i32(s
, tcg_zero
, rd
, 1, MO_32
);
13605 write_vec_element_i32(s
, tcg_zero
, rd
, 2, MO_32
);
13606 write_vec_element_i32(s
, tcg_res
, rd
, 3, MO_32
);
13611 * 31 21 20 16 15 10 9 5 4 0
13612 * +-----------------------+------+--------+------+------+
13613 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd |
13614 * +-----------------------+------+--------+------+------+
13616 static void disas_crypto_xar(DisasContext
*s
, uint32_t insn
)
13618 int rm
= extract32(insn
, 16, 5);
13619 int imm6
= extract32(insn
, 10, 6);
13620 int rn
= extract32(insn
, 5, 5);
13621 int rd
= extract32(insn
, 0, 5);
13623 if (!dc_isar_feature(aa64_sha3
, s
)) {
13624 unallocated_encoding(s
);
13628 if (!fp_access_check(s
)) {
13632 gen_gvec_xar(MO_64
, vec_full_reg_offset(s
, rd
),
13633 vec_full_reg_offset(s
, rn
),
13634 vec_full_reg_offset(s
, rm
), imm6
, 16,
13635 vec_full_reg_size(s
));
13638 /* Crypto three-reg imm2
13639 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0
13640 * +-----------------------+------+-----+------+--------+------+------+
13641 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd |
13642 * +-----------------------+------+-----+------+--------+------+------+
13644 static void disas_crypto_three_reg_imm2(DisasContext
*s
, uint32_t insn
)
13646 static gen_helper_gvec_3
* const fns
[4] = {
13647 gen_helper_crypto_sm3tt1a
, gen_helper_crypto_sm3tt1b
,
13648 gen_helper_crypto_sm3tt2a
, gen_helper_crypto_sm3tt2b
,
13650 int opcode
= extract32(insn
, 10, 2);
13651 int imm2
= extract32(insn
, 12, 2);
13652 int rm
= extract32(insn
, 16, 5);
13653 int rn
= extract32(insn
, 5, 5);
13654 int rd
= extract32(insn
, 0, 5);
13656 if (!dc_isar_feature(aa64_sm3
, s
)) {
13657 unallocated_encoding(s
);
13661 if (!fp_access_check(s
)) {
13665 gen_gvec_op3_ool(s
, true, rd
, rn
, rm
, imm2
, fns
[opcode
]);
13668 /* C3.6 Data processing - SIMD, inc Crypto
13670 * As the decode gets a little complex we are using a table based
13671 * approach for this part of the decode.
13673 static const AArch64DecodeTable data_proc_simd
[] = {
13674 /* pattern , mask , fn */
13675 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same
},
13676 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra
},
13677 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff
},
13678 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc
},
13679 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes
},
13680 { 0x0e000400, 0x9fe08400, disas_simd_copy
},
13681 { 0x0f000000, 0x9f000400, disas_simd_indexed
}, /* vector indexed */
13682 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13683 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm
},
13684 { 0x0f000400, 0x9f800400, disas_simd_shift_imm
},
13685 { 0x0e000000, 0xbf208c00, disas_simd_tb
},
13686 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn
},
13687 { 0x2e000000, 0xbf208400, disas_simd_ext
},
13688 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same
},
13689 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra
},
13690 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff
},
13691 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc
},
13692 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise
},
13693 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy
},
13694 { 0x5f000000, 0xdf000400, disas_simd_indexed
}, /* scalar indexed */
13695 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm
},
13696 { 0x4e280800, 0xff3e0c00, disas_crypto_aes
},
13697 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha
},
13698 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha
},
13699 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512
},
13700 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512
},
13701 { 0xce000000, 0xff808000, disas_crypto_four_reg
},
13702 { 0xce800000, 0xffe00000, disas_crypto_xar
},
13703 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2
},
13704 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16
},
13705 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16
},
13706 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16
},
13707 { 0x00000000, 0x00000000, NULL
}
13710 static void disas_data_proc_simd(DisasContext
*s
, uint32_t insn
)
13712 /* Note that this is called with all non-FP cases from
13713 * table C3-6 so it must UNDEF for entries not specifically
13714 * allocated to instructions in that table.
13716 AArch64DecodeFn
*fn
= lookup_disas_fn(&data_proc_simd
[0], insn
);
13720 unallocated_encoding(s
);
13724 /* C3.6 Data processing - SIMD and floating point */
13725 static void disas_data_proc_simd_fp(DisasContext
*s
, uint32_t insn
)
13727 if (extract32(insn
, 28, 1) == 1 && extract32(insn
, 30, 1) == 0) {
13728 disas_data_proc_fp(s
, insn
);
13730 /* SIMD, including crypto */
13731 disas_data_proc_simd(s
, insn
);
13735 static bool trans_OK(DisasContext
*s
, arg_OK
*a
)
13740 static bool trans_FAIL(DisasContext
*s
, arg_OK
*a
)
13742 s
->is_nonstreaming
= true;
13748 * @env: The cpu environment
13749 * @s: The DisasContext
13751 * Return true if the page is guarded.
13753 static bool is_guarded_page(CPUARMState
*env
, DisasContext
*s
)
13755 uint64_t addr
= s
->base
.pc_first
;
13756 #ifdef CONFIG_USER_ONLY
13757 return page_get_flags(addr
) & PAGE_BTI
;
13759 CPUTLBEntryFull
*full
;
13761 int mmu_idx
= arm_to_core_mmu_idx(s
->mmu_idx
);
13765 * We test this immediately after reading an insn, which means
13766 * that the TLB entry must be present and valid, and thus this
13767 * access will never raise an exception.
13769 flags
= probe_access_full(env
, addr
, 0, MMU_INST_FETCH
, mmu_idx
,
13770 false, &host
, &full
, 0);
13771 assert(!(flags
& TLB_INVALID_MASK
));
13773 return full
->guarded
;
13778 * btype_destination_ok:
13779 * @insn: The instruction at the branch destination
13780 * @bt: SCTLR_ELx.BT
13781 * @btype: PSTATE.BTYPE, and is non-zero
13783 * On a guarded page, there are a limited number of insns
13784 * that may be present at the branch target:
13785 * - branch target identifiers,
13786 * - paciasp, pacibsp,
13789 * Anything else causes a Branch Target Exception.
13791 * Return true if the branch is compatible, false to raise BTITRAP.
13793 static bool btype_destination_ok(uint32_t insn
, bool bt
, int btype
)
13795 if ((insn
& 0xfffff01fu
) == 0xd503201fu
) {
13797 switch (extract32(insn
, 5, 7)) {
13798 case 0b011001: /* PACIASP */
13799 case 0b011011: /* PACIBSP */
13801 * If SCTLR_ELx.BT, then PACI*SP are not compatible
13802 * with btype == 3. Otherwise all btype are ok.
13804 return !bt
|| btype
!= 3;
13805 case 0b100000: /* BTI */
13806 /* Not compatible with any btype. */
13808 case 0b100010: /* BTI c */
13809 /* Not compatible with btype == 3 */
13811 case 0b100100: /* BTI j */
13812 /* Not compatible with btype == 2 */
13814 case 0b100110: /* BTI jc */
13815 /* Compatible with any btype. */
13819 switch (insn
& 0xffe0001fu
) {
13820 case 0xd4200000u
: /* BRK */
13821 case 0xd4400000u
: /* HLT */
13822 /* Give priority to the breakpoint exception. */
13829 /* C3.1 A64 instruction index by encoding */
13830 static void disas_a64_legacy(DisasContext
*s
, uint32_t insn
)
13832 switch (extract32(insn
, 25, 4)) {
13834 case 0xd: /* Data processing - register */
13835 disas_data_proc_reg(s
, insn
);
13838 case 0xf: /* Data processing - SIMD and floating point */
13839 disas_data_proc_simd_fp(s
, insn
);
13842 unallocated_encoding(s
);
13847 static void aarch64_tr_init_disas_context(DisasContextBase
*dcbase
,
13850 DisasContext
*dc
= container_of(dcbase
, DisasContext
, base
);
13851 CPUARMState
*env
= cpu
->env_ptr
;
13852 ARMCPU
*arm_cpu
= env_archcpu(env
);
13853 CPUARMTBFlags tb_flags
= arm_tbflags_from_tb(dc
->base
.tb
);
13854 int bound
, core_mmu_idx
;
13856 dc
->isar
= &arm_cpu
->isar
;
13858 dc
->pc_save
= dc
->base
.pc_first
;
13859 dc
->aarch64
= true;
13862 dc
->be_data
= EX_TBFLAG_ANY(tb_flags
, BE_DATA
) ? MO_BE
: MO_LE
;
13863 dc
->condexec_mask
= 0;
13864 dc
->condexec_cond
= 0;
13865 core_mmu_idx
= EX_TBFLAG_ANY(tb_flags
, MMUIDX
);
13866 dc
->mmu_idx
= core_to_aa64_mmu_idx(core_mmu_idx
);
13867 dc
->tbii
= EX_TBFLAG_A64(tb_flags
, TBII
);
13868 dc
->tbid
= EX_TBFLAG_A64(tb_flags
, TBID
);
13869 dc
->tcma
= EX_TBFLAG_A64(tb_flags
, TCMA
);
13870 dc
->current_el
= arm_mmu_idx_to_el(dc
->mmu_idx
);
13871 #if !defined(CONFIG_USER_ONLY)
13872 dc
->user
= (dc
->current_el
== 0);
13874 dc
->fp_excp_el
= EX_TBFLAG_ANY(tb_flags
, FPEXC_EL
);
13875 dc
->align_mem
= EX_TBFLAG_ANY(tb_flags
, ALIGN_MEM
);
13876 dc
->pstate_il
= EX_TBFLAG_ANY(tb_flags
, PSTATE__IL
);
13877 dc
->fgt_active
= EX_TBFLAG_ANY(tb_flags
, FGT_ACTIVE
);
13878 dc
->fgt_svc
= EX_TBFLAG_ANY(tb_flags
, FGT_SVC
);
13879 dc
->fgt_eret
= EX_TBFLAG_A64(tb_flags
, FGT_ERET
);
13880 dc
->sve_excp_el
= EX_TBFLAG_A64(tb_flags
, SVEEXC_EL
);
13881 dc
->sme_excp_el
= EX_TBFLAG_A64(tb_flags
, SMEEXC_EL
);
13882 dc
->vl
= (EX_TBFLAG_A64(tb_flags
, VL
) + 1) * 16;
13883 dc
->svl
= (EX_TBFLAG_A64(tb_flags
, SVL
) + 1) * 16;
13884 dc
->pauth_active
= EX_TBFLAG_A64(tb_flags
, PAUTH_ACTIVE
);
13885 dc
->bt
= EX_TBFLAG_A64(tb_flags
, BT
);
13886 dc
->btype
= EX_TBFLAG_A64(tb_flags
, BTYPE
);
13887 dc
->unpriv
= EX_TBFLAG_A64(tb_flags
, UNPRIV
);
13888 dc
->ata
= EX_TBFLAG_A64(tb_flags
, ATA
);
13889 dc
->mte_active
[0] = EX_TBFLAG_A64(tb_flags
, MTE_ACTIVE
);
13890 dc
->mte_active
[1] = EX_TBFLAG_A64(tb_flags
, MTE0_ACTIVE
);
13891 dc
->pstate_sm
= EX_TBFLAG_A64(tb_flags
, PSTATE_SM
);
13892 dc
->pstate_za
= EX_TBFLAG_A64(tb_flags
, PSTATE_ZA
);
13893 dc
->sme_trap_nonstreaming
= EX_TBFLAG_A64(tb_flags
, SME_TRAP_NONSTREAMING
);
13894 dc
->naa
= EX_TBFLAG_A64(tb_flags
, NAA
);
13896 dc
->vec_stride
= 0;
13897 dc
->cp_regs
= arm_cpu
->cp_regs
;
13898 dc
->features
= env
->features
;
13899 dc
->dcz_blocksize
= arm_cpu
->dcz_blocksize
;
13900 dc
->gm_blocksize
= arm_cpu
->gm_blocksize
;
13902 #ifdef CONFIG_USER_ONLY
13903 /* In sve_probe_page, we assume TBI is enabled. */
13904 tcg_debug_assert(dc
->tbid
& 1);
13907 dc
->lse2
= dc_isar_feature(aa64_lse2
, dc
);
13909 /* Single step state. The code-generation logic here is:
13911 * generate code with no special handling for single-stepping (except
13912 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
13913 * this happens anyway because those changes are all system register or
13915 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
13916 * emit code for one insn
13917 * emit code to clear PSTATE.SS
13918 * emit code to generate software step exception for completed step
13919 * end TB (as usual for having generated an exception)
13920 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
13921 * emit code to generate a software step exception
13924 dc
->ss_active
= EX_TBFLAG_ANY(tb_flags
, SS_ACTIVE
);
13925 dc
->pstate_ss
= EX_TBFLAG_ANY(tb_flags
, PSTATE__SS
);
13926 dc
->is_ldex
= false;
13928 /* Bound the number of insns to execute to those left on the page. */
13929 bound
= -(dc
->base
.pc_first
| TARGET_PAGE_MASK
) / 4;
13931 /* If architectural single step active, limit to 1. */
13932 if (dc
->ss_active
) {
13935 dc
->base
.max_insns
= MIN(dc
->base
.max_insns
, bound
);
13938 static void aarch64_tr_tb_start(DisasContextBase
*db
, CPUState
*cpu
)
13942 static void aarch64_tr_insn_start(DisasContextBase
*dcbase
, CPUState
*cpu
)
13944 DisasContext
*dc
= container_of(dcbase
, DisasContext
, base
);
13945 target_ulong pc_arg
= dc
->base
.pc_next
;
13947 if (tb_cflags(dcbase
->tb
) & CF_PCREL
) {
13948 pc_arg
&= ~TARGET_PAGE_MASK
;
13950 tcg_gen_insn_start(pc_arg
, 0, 0);
13951 dc
->insn_start
= tcg_last_op();
13954 static void aarch64_tr_translate_insn(DisasContextBase
*dcbase
, CPUState
*cpu
)
13956 DisasContext
*s
= container_of(dcbase
, DisasContext
, base
);
13957 CPUARMState
*env
= cpu
->env_ptr
;
13958 uint64_t pc
= s
->base
.pc_next
;
13961 /* Singlestep exceptions have the highest priority. */
13962 if (s
->ss_active
&& !s
->pstate_ss
) {
13963 /* Singlestep state is Active-pending.
13964 * If we're in this state at the start of a TB then either
13965 * a) we just took an exception to an EL which is being debugged
13966 * and this is the first insn in the exception handler
13967 * b) debug exceptions were masked and we just unmasked them
13968 * without changing EL (eg by clearing PSTATE.D)
13969 * In either case we're going to take a swstep exception in the
13970 * "did not step an insn" case, and so the syndrome ISV and EX
13971 * bits should be zero.
13973 assert(s
->base
.num_insns
== 1);
13974 gen_swstep_exception(s
, 0, 0);
13975 s
->base
.is_jmp
= DISAS_NORETURN
;
13976 s
->base
.pc_next
= pc
+ 4;
13982 * PC alignment fault. This has priority over the instruction abort
13983 * that we would receive from a translation fault via arm_ldl_code.
13984 * This should only be possible after an indirect branch, at the
13987 assert(s
->base
.num_insns
== 1);
13988 gen_helper_exception_pc_alignment(cpu_env
, tcg_constant_tl(pc
));
13989 s
->base
.is_jmp
= DISAS_NORETURN
;
13990 s
->base
.pc_next
= QEMU_ALIGN_UP(pc
, 4);
13995 insn
= arm_ldl_code(env
, &s
->base
, pc
, s
->sctlr_b
);
13997 s
->base
.pc_next
= pc
+ 4;
13999 s
->fp_access_checked
= false;
14000 s
->sve_access_checked
= false;
14002 if (s
->pstate_il
) {
14004 * Illegal execution state. This has priority over BTI
14005 * exceptions, but comes after instruction abort exceptions.
14007 gen_exception_insn(s
, 0, EXCP_UDEF
, syn_illegalstate());
14011 if (dc_isar_feature(aa64_bti
, s
)) {
14012 if (s
->base
.num_insns
== 1) {
14014 * At the first insn of the TB, compute s->guarded_page.
14015 * We delayed computing this until successfully reading
14016 * the first insn of the TB, above. This (mostly) ensures
14017 * that the softmmu tlb entry has been populated, and the
14018 * page table GP bit is available.
14020 * Note that we need to compute this even if btype == 0,
14021 * because this value is used for BR instructions later
14022 * where ENV is not available.
14024 s
->guarded_page
= is_guarded_page(env
, s
);
14026 /* First insn can have btype set to non-zero. */
14027 tcg_debug_assert(s
->btype
>= 0);
14030 * Note that the Branch Target Exception has fairly high
14031 * priority -- below debugging exceptions but above most
14032 * everything else. This allows us to handle this now
14033 * instead of waiting until the insn is otherwise decoded.
14037 && !btype_destination_ok(insn
, s
->bt
, s
->btype
)) {
14038 gen_exception_insn(s
, 0, EXCP_UDEF
, syn_btitrap(s
->btype
));
14042 /* Not the first insn: btype must be 0. */
14043 tcg_debug_assert(s
->btype
== 0);
14047 s
->is_nonstreaming
= false;
14048 if (s
->sme_trap_nonstreaming
) {
14049 disas_sme_fa64(s
, insn
);
14052 if (!disas_a64(s
, insn
) &&
14053 !disas_sme(s
, insn
) &&
14054 !disas_sve(s
, insn
)) {
14055 disas_a64_legacy(s
, insn
);
14059 * After execution of most insns, btype is reset to 0.
14060 * Note that we set btype == -1 when the insn sets btype.
14062 if (s
->btype
> 0 && s
->base
.is_jmp
!= DISAS_NORETURN
) {
14067 static void aarch64_tr_tb_stop(DisasContextBase
*dcbase
, CPUState
*cpu
)
14069 DisasContext
*dc
= container_of(dcbase
, DisasContext
, base
);
14071 if (unlikely(dc
->ss_active
)) {
14072 /* Note that this means single stepping WFI doesn't halt the CPU.
14073 * For conditional branch insns this is harmless unreachable code as
14074 * gen_goto_tb() has already handled emitting the debug exception
14075 * (and thus a tb-jump is not possible when singlestepping).
14077 switch (dc
->base
.is_jmp
) {
14079 gen_a64_update_pc(dc
, 4);
14083 gen_step_complete_exception(dc
);
14085 case DISAS_NORETURN
:
14089 switch (dc
->base
.is_jmp
) {
14091 case DISAS_TOO_MANY
:
14092 gen_goto_tb(dc
, 1, 4);
14095 case DISAS_UPDATE_EXIT
:
14096 gen_a64_update_pc(dc
, 4);
14099 tcg_gen_exit_tb(NULL
, 0);
14101 case DISAS_UPDATE_NOCHAIN
:
14102 gen_a64_update_pc(dc
, 4);
14105 tcg_gen_lookup_and_goto_ptr();
14107 case DISAS_NORETURN
:
14111 gen_a64_update_pc(dc
, 4);
14112 gen_helper_wfe(cpu_env
);
14115 gen_a64_update_pc(dc
, 4);
14116 gen_helper_yield(cpu_env
);
14120 * This is a special case because we don't want to just halt
14121 * the CPU if trying to debug across a WFI.
14123 gen_a64_update_pc(dc
, 4);
14124 gen_helper_wfi(cpu_env
, tcg_constant_i32(4));
14126 * The helper doesn't necessarily throw an exception, but we
14127 * must go back to the main loop to check for interrupts anyway.
14129 tcg_gen_exit_tb(NULL
, 0);
14135 static void aarch64_tr_disas_log(const DisasContextBase
*dcbase
,
14136 CPUState
*cpu
, FILE *logfile
)
14138 DisasContext
*dc
= container_of(dcbase
, DisasContext
, base
);
14140 fprintf(logfile
, "IN: %s\n", lookup_symbol(dc
->base
.pc_first
));
14141 target_disas(logfile
, cpu
, dc
->base
.pc_first
, dc
->base
.tb
->size
);
14144 const TranslatorOps aarch64_translator_ops
= {
14145 .init_disas_context
= aarch64_tr_init_disas_context
,
14146 .tb_start
= aarch64_tr_tb_start
,
14147 .insn_start
= aarch64_tr_insn_start
,
14148 .translate_insn
= aarch64_tr_translate_insn
,
14149 .tb_stop
= aarch64_tr_tb_stop
,
14150 .disas_log
= aarch64_tr_disas_log
,