]> git.proxmox.com Git - mirror_qemu.git/blob - target/arm/translate.c
target/arm: Remove unnecessary range check for VSHL
[mirror_qemu.git] / target / arm / translate.c
1 /*
2 * ARM translation
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "qemu/osdep.h"
22
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "hw/semihosting/semihost.h"
33
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
36
37 #include "trace-tcg.h"
38 #include "exec/log.h"
39
40
41 #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J dc_isar_feature(aa32_jazelle, s)
46 #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
51
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
53
54 #include "translate.h"
55
56 #if defined(CONFIG_USER_ONLY)
57 #define IS_USER(s) 1
58 #else
59 #define IS_USER(s) (s->user)
60 #endif
61
62 /* We reuse the same 64-bit temporaries for efficiency. */
63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
64 static TCGv_i32 cpu_R[16];
65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
66 TCGv_i64 cpu_exclusive_addr;
67 TCGv_i64 cpu_exclusive_val;
68
69 #include "exec/gen-icount.h"
70
71 static const char * const regnames[] =
72 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
73 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
74
75 /* Function prototypes for gen_ functions calling Neon helpers. */
76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
77 TCGv_i32, TCGv_i32);
78 /* Function prototypes for gen_ functions for fix point conversions */
79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
80
81 /* initialize TCG globals. */
82 void arm_translate_init(void)
83 {
84 int i;
85
86 for (i = 0; i < 16; i++) {
87 cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
88 offsetof(CPUARMState, regs[i]),
89 regnames[i]);
90 }
91 cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
92 cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
93 cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
94 cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
95
96 cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
97 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
98 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
99 offsetof(CPUARMState, exclusive_val), "exclusive_val");
100
101 a64_translate_init();
102 }
103
104 /* Flags for the disas_set_da_iss info argument:
105 * lower bits hold the Rt register number, higher bits are flags.
106 */
107 typedef enum ISSInfo {
108 ISSNone = 0,
109 ISSRegMask = 0x1f,
110 ISSInvalid = (1 << 5),
111 ISSIsAcqRel = (1 << 6),
112 ISSIsWrite = (1 << 7),
113 ISSIs16Bit = (1 << 8),
114 } ISSInfo;
115
116 /* Save the syndrome information for a Data Abort */
117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
118 {
119 uint32_t syn;
120 int sas = memop & MO_SIZE;
121 bool sse = memop & MO_SIGN;
122 bool is_acqrel = issinfo & ISSIsAcqRel;
123 bool is_write = issinfo & ISSIsWrite;
124 bool is_16bit = issinfo & ISSIs16Bit;
125 int srt = issinfo & ISSRegMask;
126
127 if (issinfo & ISSInvalid) {
128 /* Some callsites want to conditionally provide ISS info,
129 * eg "only if this was not a writeback"
130 */
131 return;
132 }
133
134 if (srt == 15) {
135 /* For AArch32, insns where the src/dest is R15 never generate
136 * ISS information. Catching that here saves checking at all
137 * the call sites.
138 */
139 return;
140 }
141
142 syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
143 0, 0, 0, is_write, 0, is_16bit);
144 disas_set_insn_syndrome(s, syn);
145 }
146
147 static inline int get_a32_user_mem_index(DisasContext *s)
148 {
149 /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
150 * insns:
151 * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
152 * otherwise, access as if at PL0.
153 */
154 switch (s->mmu_idx) {
155 case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */
156 case ARMMMUIdx_E10_0:
157 case ARMMMUIdx_E10_1:
158 case ARMMMUIdx_E10_1_PAN:
159 return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
160 case ARMMMUIdx_SE3:
161 case ARMMMUIdx_SE10_0:
162 case ARMMMUIdx_SE10_1:
163 case ARMMMUIdx_SE10_1_PAN:
164 return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
165 case ARMMMUIdx_MUser:
166 case ARMMMUIdx_MPriv:
167 return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
168 case ARMMMUIdx_MUserNegPri:
169 case ARMMMUIdx_MPrivNegPri:
170 return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
171 case ARMMMUIdx_MSUser:
172 case ARMMMUIdx_MSPriv:
173 return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
174 case ARMMMUIdx_MSUserNegPri:
175 case ARMMMUIdx_MSPrivNegPri:
176 return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
177 default:
178 g_assert_not_reached();
179 }
180 }
181
182 static inline TCGv_i32 load_cpu_offset(int offset)
183 {
184 TCGv_i32 tmp = tcg_temp_new_i32();
185 tcg_gen_ld_i32(tmp, cpu_env, offset);
186 return tmp;
187 }
188
189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
190
191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
192 {
193 tcg_gen_st_i32(var, cpu_env, offset);
194 tcg_temp_free_i32(var);
195 }
196
197 #define store_cpu_field(var, name) \
198 store_cpu_offset(var, offsetof(CPUARMState, name))
199
200 /* The architectural value of PC. */
201 static uint32_t read_pc(DisasContext *s)
202 {
203 return s->pc_curr + (s->thumb ? 4 : 8);
204 }
205
206 /* Set a variable to the value of a CPU register. */
207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
208 {
209 if (reg == 15) {
210 tcg_gen_movi_i32(var, read_pc(s));
211 } else {
212 tcg_gen_mov_i32(var, cpu_R[reg]);
213 }
214 }
215
216 /* Create a new temporary and set it to the value of a CPU register. */
217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
218 {
219 TCGv_i32 tmp = tcg_temp_new_i32();
220 load_reg_var(s, tmp, reg);
221 return tmp;
222 }
223
224 /*
225 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
226 * This is used for load/store for which use of PC implies (literal),
227 * or ADD that implies ADR.
228 */
229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
230 {
231 TCGv_i32 tmp = tcg_temp_new_i32();
232
233 if (reg == 15) {
234 tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
235 } else {
236 tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
237 }
238 return tmp;
239 }
240
241 /* Set a CPU register. The source must be a temporary and will be
242 marked as dead. */
243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
244 {
245 if (reg == 15) {
246 /* In Thumb mode, we must ignore bit 0.
247 * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
248 * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
249 * We choose to ignore [1:0] in ARM mode for all architecture versions.
250 */
251 tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
252 s->base.is_jmp = DISAS_JUMP;
253 }
254 tcg_gen_mov_i32(cpu_R[reg], var);
255 tcg_temp_free_i32(var);
256 }
257
258 /*
259 * Variant of store_reg which applies v8M stack-limit checks before updating
260 * SP. If the check fails this will result in an exception being taken.
261 * We disable the stack checks for CONFIG_USER_ONLY because we have
262 * no idea what the stack limits should be in that case.
263 * If stack checking is not being done this just acts like store_reg().
264 */
265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
266 {
267 #ifndef CONFIG_USER_ONLY
268 if (s->v8m_stackcheck) {
269 gen_helper_v8m_stackcheck(cpu_env, var);
270 }
271 #endif
272 store_reg(s, 13, var);
273 }
274
275 /* Value extensions. */
276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
280
281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
283
284
285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
286 {
287 TCGv_i32 tmp_mask = tcg_const_i32(mask);
288 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
289 tcg_temp_free_i32(tmp_mask);
290 }
291 /* Set NZCV flags from the high 4 bits of var. */
292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
293
294 static void gen_exception_internal(int excp)
295 {
296 TCGv_i32 tcg_excp = tcg_const_i32(excp);
297
298 assert(excp_is_internal(excp));
299 gen_helper_exception_internal(cpu_env, tcg_excp);
300 tcg_temp_free_i32(tcg_excp);
301 }
302
303 static void gen_step_complete_exception(DisasContext *s)
304 {
305 /* We just completed step of an insn. Move from Active-not-pending
306 * to Active-pending, and then also take the swstep exception.
307 * This corresponds to making the (IMPDEF) choice to prioritize
308 * swstep exceptions over asynchronous exceptions taken to an exception
309 * level where debug is disabled. This choice has the advantage that
310 * we do not need to maintain internal state corresponding to the
311 * ISV/EX syndrome bits between completion of the step and generation
312 * of the exception, and our syndrome information is always correct.
313 */
314 gen_ss_advance(s);
315 gen_swstep_exception(s, 1, s->is_ldex);
316 s->base.is_jmp = DISAS_NORETURN;
317 }
318
319 static void gen_singlestep_exception(DisasContext *s)
320 {
321 /* Generate the right kind of exception for singlestep, which is
322 * either the architectural singlestep or EXCP_DEBUG for QEMU's
323 * gdb singlestepping.
324 */
325 if (s->ss_active) {
326 gen_step_complete_exception(s);
327 } else {
328 gen_exception_internal(EXCP_DEBUG);
329 }
330 }
331
332 static inline bool is_singlestepping(DisasContext *s)
333 {
334 /* Return true if we are singlestepping either because of
335 * architectural singlestep or QEMU gdbstub singlestep. This does
336 * not include the command line '-singlestep' mode which is rather
337 * misnamed as it only means "one instruction per TB" and doesn't
338 * affect the code we generate.
339 */
340 return s->base.singlestep_enabled || s->ss_active;
341 }
342
343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
344 {
345 TCGv_i32 tmp1 = tcg_temp_new_i32();
346 TCGv_i32 tmp2 = tcg_temp_new_i32();
347 tcg_gen_ext16s_i32(tmp1, a);
348 tcg_gen_ext16s_i32(tmp2, b);
349 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
350 tcg_temp_free_i32(tmp2);
351 tcg_gen_sari_i32(a, a, 16);
352 tcg_gen_sari_i32(b, b, 16);
353 tcg_gen_mul_i32(b, b, a);
354 tcg_gen_mov_i32(a, tmp1);
355 tcg_temp_free_i32(tmp1);
356 }
357
358 /* Byteswap each halfword. */
359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
360 {
361 TCGv_i32 tmp = tcg_temp_new_i32();
362 TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
363 tcg_gen_shri_i32(tmp, var, 8);
364 tcg_gen_and_i32(tmp, tmp, mask);
365 tcg_gen_and_i32(var, var, mask);
366 tcg_gen_shli_i32(var, var, 8);
367 tcg_gen_or_i32(dest, var, tmp);
368 tcg_temp_free_i32(mask);
369 tcg_temp_free_i32(tmp);
370 }
371
372 /* Byteswap low halfword and sign extend. */
373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
374 {
375 tcg_gen_ext16u_i32(var, var);
376 tcg_gen_bswap16_i32(var, var);
377 tcg_gen_ext16s_i32(dest, var);
378 }
379
380 /* 32x32->64 multiply. Marks inputs as dead. */
381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
382 {
383 TCGv_i32 lo = tcg_temp_new_i32();
384 TCGv_i32 hi = tcg_temp_new_i32();
385 TCGv_i64 ret;
386
387 tcg_gen_mulu2_i32(lo, hi, a, b);
388 tcg_temp_free_i32(a);
389 tcg_temp_free_i32(b);
390
391 ret = tcg_temp_new_i64();
392 tcg_gen_concat_i32_i64(ret, lo, hi);
393 tcg_temp_free_i32(lo);
394 tcg_temp_free_i32(hi);
395
396 return ret;
397 }
398
399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
400 {
401 TCGv_i32 lo = tcg_temp_new_i32();
402 TCGv_i32 hi = tcg_temp_new_i32();
403 TCGv_i64 ret;
404
405 tcg_gen_muls2_i32(lo, hi, a, b);
406 tcg_temp_free_i32(a);
407 tcg_temp_free_i32(b);
408
409 ret = tcg_temp_new_i64();
410 tcg_gen_concat_i32_i64(ret, lo, hi);
411 tcg_temp_free_i32(lo);
412 tcg_temp_free_i32(hi);
413
414 return ret;
415 }
416
417 /* Swap low and high halfwords. */
418 static void gen_swap_half(TCGv_i32 var)
419 {
420 tcg_gen_rotri_i32(var, var, 16);
421 }
422
423 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
424 tmp = (t0 ^ t1) & 0x8000;
425 t0 &= ~0x8000;
426 t1 &= ~0x8000;
427 t0 = (t0 + t1) ^ tmp;
428 */
429
430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
431 {
432 TCGv_i32 tmp = tcg_temp_new_i32();
433 tcg_gen_xor_i32(tmp, t0, t1);
434 tcg_gen_andi_i32(tmp, tmp, 0x8000);
435 tcg_gen_andi_i32(t0, t0, ~0x8000);
436 tcg_gen_andi_i32(t1, t1, ~0x8000);
437 tcg_gen_add_i32(t0, t0, t1);
438 tcg_gen_xor_i32(dest, t0, tmp);
439 tcg_temp_free_i32(tmp);
440 }
441
442 /* Set N and Z flags from var. */
443 static inline void gen_logic_CC(TCGv_i32 var)
444 {
445 tcg_gen_mov_i32(cpu_NF, var);
446 tcg_gen_mov_i32(cpu_ZF, var);
447 }
448
449 /* dest = T0 + T1 + CF. */
450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
451 {
452 tcg_gen_add_i32(dest, t0, t1);
453 tcg_gen_add_i32(dest, dest, cpu_CF);
454 }
455
456 /* dest = T0 - T1 + CF - 1. */
457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
458 {
459 tcg_gen_sub_i32(dest, t0, t1);
460 tcg_gen_add_i32(dest, dest, cpu_CF);
461 tcg_gen_subi_i32(dest, dest, 1);
462 }
463
464 /* dest = T0 + T1. Compute C, N, V and Z flags */
465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
466 {
467 TCGv_i32 tmp = tcg_temp_new_i32();
468 tcg_gen_movi_i32(tmp, 0);
469 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
470 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
471 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
472 tcg_gen_xor_i32(tmp, t0, t1);
473 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
474 tcg_temp_free_i32(tmp);
475 tcg_gen_mov_i32(dest, cpu_NF);
476 }
477
478 /* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
480 {
481 TCGv_i32 tmp = tcg_temp_new_i32();
482 if (TCG_TARGET_HAS_add2_i32) {
483 tcg_gen_movi_i32(tmp, 0);
484 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
485 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
486 } else {
487 TCGv_i64 q0 = tcg_temp_new_i64();
488 TCGv_i64 q1 = tcg_temp_new_i64();
489 tcg_gen_extu_i32_i64(q0, t0);
490 tcg_gen_extu_i32_i64(q1, t1);
491 tcg_gen_add_i64(q0, q0, q1);
492 tcg_gen_extu_i32_i64(q1, cpu_CF);
493 tcg_gen_add_i64(q0, q0, q1);
494 tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
495 tcg_temp_free_i64(q0);
496 tcg_temp_free_i64(q1);
497 }
498 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
499 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
500 tcg_gen_xor_i32(tmp, t0, t1);
501 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
502 tcg_temp_free_i32(tmp);
503 tcg_gen_mov_i32(dest, cpu_NF);
504 }
505
506 /* dest = T0 - T1. Compute C, N, V and Z flags */
507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
508 {
509 TCGv_i32 tmp;
510 tcg_gen_sub_i32(cpu_NF, t0, t1);
511 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
512 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
513 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
514 tmp = tcg_temp_new_i32();
515 tcg_gen_xor_i32(tmp, t0, t1);
516 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
517 tcg_temp_free_i32(tmp);
518 tcg_gen_mov_i32(dest, cpu_NF);
519 }
520
521 /* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
523 {
524 TCGv_i32 tmp = tcg_temp_new_i32();
525 tcg_gen_not_i32(tmp, t1);
526 gen_adc_CC(dest, t0, tmp);
527 tcg_temp_free_i32(tmp);
528 }
529
530 #define GEN_SHIFT(name) \
531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
532 { \
533 TCGv_i32 tmp1, tmp2, tmp3; \
534 tmp1 = tcg_temp_new_i32(); \
535 tcg_gen_andi_i32(tmp1, t1, 0xff); \
536 tmp2 = tcg_const_i32(0); \
537 tmp3 = tcg_const_i32(0x1f); \
538 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
539 tcg_temp_free_i32(tmp3); \
540 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
541 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
542 tcg_temp_free_i32(tmp2); \
543 tcg_temp_free_i32(tmp1); \
544 }
545 GEN_SHIFT(shl)
546 GEN_SHIFT(shr)
547 #undef GEN_SHIFT
548
549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
550 {
551 TCGv_i32 tmp1, tmp2;
552 tmp1 = tcg_temp_new_i32();
553 tcg_gen_andi_i32(tmp1, t1, 0xff);
554 tmp2 = tcg_const_i32(0x1f);
555 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
556 tcg_temp_free_i32(tmp2);
557 tcg_gen_sar_i32(dest, t0, tmp1);
558 tcg_temp_free_i32(tmp1);
559 }
560
561 static void shifter_out_im(TCGv_i32 var, int shift)
562 {
563 tcg_gen_extract_i32(cpu_CF, var, shift, 1);
564 }
565
566 /* Shift by immediate. Includes special handling for shift == 0. */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568 int shift, int flags)
569 {
570 switch (shiftop) {
571 case 0: /* LSL */
572 if (shift != 0) {
573 if (flags)
574 shifter_out_im(var, 32 - shift);
575 tcg_gen_shli_i32(var, var, shift);
576 }
577 break;
578 case 1: /* LSR */
579 if (shift == 0) {
580 if (flags) {
581 tcg_gen_shri_i32(cpu_CF, var, 31);
582 }
583 tcg_gen_movi_i32(var, 0);
584 } else {
585 if (flags)
586 shifter_out_im(var, shift - 1);
587 tcg_gen_shri_i32(var, var, shift);
588 }
589 break;
590 case 2: /* ASR */
591 if (shift == 0)
592 shift = 32;
593 if (flags)
594 shifter_out_im(var, shift - 1);
595 if (shift == 32)
596 shift = 31;
597 tcg_gen_sari_i32(var, var, shift);
598 break;
599 case 3: /* ROR/RRX */
600 if (shift != 0) {
601 if (flags)
602 shifter_out_im(var, shift - 1);
603 tcg_gen_rotri_i32(var, var, shift); break;
604 } else {
605 TCGv_i32 tmp = tcg_temp_new_i32();
606 tcg_gen_shli_i32(tmp, cpu_CF, 31);
607 if (flags)
608 shifter_out_im(var, 0);
609 tcg_gen_shri_i32(var, var, 1);
610 tcg_gen_or_i32(var, var, tmp);
611 tcg_temp_free_i32(tmp);
612 }
613 }
614 };
615
616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
617 TCGv_i32 shift, int flags)
618 {
619 if (flags) {
620 switch (shiftop) {
621 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
622 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
623 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
624 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
625 }
626 } else {
627 switch (shiftop) {
628 case 0:
629 gen_shl(var, var, shift);
630 break;
631 case 1:
632 gen_shr(var, var, shift);
633 break;
634 case 2:
635 gen_sar(var, var, shift);
636 break;
637 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
638 tcg_gen_rotr_i32(var, var, shift); break;
639 }
640 }
641 tcg_temp_free_i32(shift);
642 }
643
644 /*
645 * Generate a conditional based on ARM condition code cc.
646 * This is common between ARM and Aarch64 targets.
647 */
648 void arm_test_cc(DisasCompare *cmp, int cc)
649 {
650 TCGv_i32 value;
651 TCGCond cond;
652 bool global = true;
653
654 switch (cc) {
655 case 0: /* eq: Z */
656 case 1: /* ne: !Z */
657 cond = TCG_COND_EQ;
658 value = cpu_ZF;
659 break;
660
661 case 2: /* cs: C */
662 case 3: /* cc: !C */
663 cond = TCG_COND_NE;
664 value = cpu_CF;
665 break;
666
667 case 4: /* mi: N */
668 case 5: /* pl: !N */
669 cond = TCG_COND_LT;
670 value = cpu_NF;
671 break;
672
673 case 6: /* vs: V */
674 case 7: /* vc: !V */
675 cond = TCG_COND_LT;
676 value = cpu_VF;
677 break;
678
679 case 8: /* hi: C && !Z */
680 case 9: /* ls: !C || Z -> !(C && !Z) */
681 cond = TCG_COND_NE;
682 value = tcg_temp_new_i32();
683 global = false;
684 /* CF is 1 for C, so -CF is an all-bits-set mask for C;
685 ZF is non-zero for !Z; so AND the two subexpressions. */
686 tcg_gen_neg_i32(value, cpu_CF);
687 tcg_gen_and_i32(value, value, cpu_ZF);
688 break;
689
690 case 10: /* ge: N == V -> N ^ V == 0 */
691 case 11: /* lt: N != V -> N ^ V != 0 */
692 /* Since we're only interested in the sign bit, == 0 is >= 0. */
693 cond = TCG_COND_GE;
694 value = tcg_temp_new_i32();
695 global = false;
696 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
697 break;
698
699 case 12: /* gt: !Z && N == V */
700 case 13: /* le: Z || N != V */
701 cond = TCG_COND_NE;
702 value = tcg_temp_new_i32();
703 global = false;
704 /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
705 * the sign bit then AND with ZF to yield the result. */
706 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
707 tcg_gen_sari_i32(value, value, 31);
708 tcg_gen_andc_i32(value, cpu_ZF, value);
709 break;
710
711 case 14: /* always */
712 case 15: /* always */
713 /* Use the ALWAYS condition, which will fold early.
714 * It doesn't matter what we use for the value. */
715 cond = TCG_COND_ALWAYS;
716 value = cpu_ZF;
717 goto no_invert;
718
719 default:
720 fprintf(stderr, "Bad condition code 0x%x\n", cc);
721 abort();
722 }
723
724 if (cc & 1) {
725 cond = tcg_invert_cond(cond);
726 }
727
728 no_invert:
729 cmp->cond = cond;
730 cmp->value = value;
731 cmp->value_global = global;
732 }
733
734 void arm_free_cc(DisasCompare *cmp)
735 {
736 if (!cmp->value_global) {
737 tcg_temp_free_i32(cmp->value);
738 }
739 }
740
741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
742 {
743 tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
744 }
745
746 void arm_gen_test_cc(int cc, TCGLabel *label)
747 {
748 DisasCompare cmp;
749 arm_test_cc(&cmp, cc);
750 arm_jump_cc(&cmp, label);
751 arm_free_cc(&cmp);
752 }
753
754 static inline void gen_set_condexec(DisasContext *s)
755 {
756 if (s->condexec_mask) {
757 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
758 TCGv_i32 tmp = tcg_temp_new_i32();
759 tcg_gen_movi_i32(tmp, val);
760 store_cpu_field(tmp, condexec_bits);
761 }
762 }
763
764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
765 {
766 tcg_gen_movi_i32(cpu_R[15], val);
767 }
768
769 /* Set PC and Thumb state from var. var is marked as dead. */
770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
771 {
772 s->base.is_jmp = DISAS_JUMP;
773 tcg_gen_andi_i32(cpu_R[15], var, ~1);
774 tcg_gen_andi_i32(var, var, 1);
775 store_cpu_field(var, thumb);
776 }
777
778 /*
779 * Set PC and Thumb state from var. var is marked as dead.
780 * For M-profile CPUs, include logic to detect exception-return
781 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
782 * and BX reg, and no others, and happens only for code in Handler mode.
783 * The Security Extension also requires us to check for the FNC_RETURN
784 * which signals a function return from non-secure state; this can happen
785 * in both Handler and Thread mode.
786 * To avoid having to do multiple comparisons in inline generated code,
787 * we make the check we do here loose, so it will match for EXC_RETURN
788 * in Thread mode. For system emulation do_v7m_exception_exit() checks
789 * for these spurious cases and returns without doing anything (giving
790 * the same behaviour as for a branch to a non-magic address).
791 *
792 * In linux-user mode it is unclear what the right behaviour for an
793 * attempted FNC_RETURN should be, because in real hardware this will go
794 * directly to Secure code (ie not the Linux kernel) which will then treat
795 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
796 * attempt behave the way it would on a CPU without the security extension,
797 * which is to say "like a normal branch". That means we can simply treat
798 * all branches as normal with no magic address behaviour.
799 */
800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
801 {
802 /* Generate the same code here as for a simple bx, but flag via
803 * s->base.is_jmp that we need to do the rest of the work later.
804 */
805 gen_bx(s, var);
806 #ifndef CONFIG_USER_ONLY
807 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
808 (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
809 s->base.is_jmp = DISAS_BX_EXCRET;
810 }
811 #endif
812 }
813
814 static inline void gen_bx_excret_final_code(DisasContext *s)
815 {
816 /* Generate the code to finish possible exception return and end the TB */
817 TCGLabel *excret_label = gen_new_label();
818 uint32_t min_magic;
819
820 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
821 /* Covers FNC_RETURN and EXC_RETURN magic */
822 min_magic = FNC_RETURN_MIN_MAGIC;
823 } else {
824 /* EXC_RETURN magic only */
825 min_magic = EXC_RETURN_MIN_MAGIC;
826 }
827
828 /* Is the new PC value in the magic range indicating exception return? */
829 tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
830 /* No: end the TB as we would for a DISAS_JMP */
831 if (is_singlestepping(s)) {
832 gen_singlestep_exception(s);
833 } else {
834 tcg_gen_exit_tb(NULL, 0);
835 }
836 gen_set_label(excret_label);
837 /* Yes: this is an exception return.
838 * At this point in runtime env->regs[15] and env->thumb will hold
839 * the exception-return magic number, which do_v7m_exception_exit()
840 * will read. Nothing else will be able to see those values because
841 * the cpu-exec main loop guarantees that we will always go straight
842 * from raising the exception to the exception-handling code.
843 *
844 * gen_ss_advance(s) does nothing on M profile currently but
845 * calling it is conceptually the right thing as we have executed
846 * this instruction (compare SWI, HVC, SMC handling).
847 */
848 gen_ss_advance(s);
849 gen_exception_internal(EXCP_EXCEPTION_EXIT);
850 }
851
852 static inline void gen_bxns(DisasContext *s, int rm)
853 {
854 TCGv_i32 var = load_reg(s, rm);
855
856 /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
857 * we need to sync state before calling it, but:
858 * - we don't need to do gen_set_pc_im() because the bxns helper will
859 * always set the PC itself
860 * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
861 * unless it's outside an IT block or the last insn in an IT block,
862 * so we know that condexec == 0 (already set at the top of the TB)
863 * is correct in the non-UNPREDICTABLE cases, and we can choose
864 * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
865 */
866 gen_helper_v7m_bxns(cpu_env, var);
867 tcg_temp_free_i32(var);
868 s->base.is_jmp = DISAS_EXIT;
869 }
870
871 static inline void gen_blxns(DisasContext *s, int rm)
872 {
873 TCGv_i32 var = load_reg(s, rm);
874
875 /* We don't need to sync condexec state, for the same reason as bxns.
876 * We do however need to set the PC, because the blxns helper reads it.
877 * The blxns helper may throw an exception.
878 */
879 gen_set_pc_im(s, s->base.pc_next);
880 gen_helper_v7m_blxns(cpu_env, var);
881 tcg_temp_free_i32(var);
882 s->base.is_jmp = DISAS_EXIT;
883 }
884
885 /* Variant of store_reg which uses branch&exchange logic when storing
886 to r15 in ARM architecture v7 and above. The source must be a temporary
887 and will be marked as dead. */
888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
889 {
890 if (reg == 15 && ENABLE_ARCH_7) {
891 gen_bx(s, var);
892 } else {
893 store_reg(s, reg, var);
894 }
895 }
896
897 /* Variant of store_reg which uses branch&exchange logic when storing
898 * to r15 in ARM architecture v5T and above. This is used for storing
899 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
900 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
902 {
903 if (reg == 15 && ENABLE_ARCH_5) {
904 gen_bx_excret(s, var);
905 } else {
906 store_reg(s, reg, var);
907 }
908 }
909
910 #ifdef CONFIG_USER_ONLY
911 #define IS_USER_ONLY 1
912 #else
913 #define IS_USER_ONLY 0
914 #endif
915
916 /* Abstractions of "generate code to do a guest load/store for
917 * AArch32", where a vaddr is always 32 bits (and is zero
918 * extended if we're a 64 bit core) and data is also
919 * 32 bits unless specifically doing a 64 bit access.
920 * These functions work like tcg_gen_qemu_{ld,st}* except
921 * that the address argument is TCGv_i32 rather than TCGv.
922 */
923
924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
925 {
926 TCGv addr = tcg_temp_new();
927 tcg_gen_extu_i32_tl(addr, a32);
928
929 /* Not needed for user-mode BE32, where we use MO_BE instead. */
930 if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931 tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
932 }
933 return addr;
934 }
935
936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
937 int index, MemOp opc)
938 {
939 TCGv addr;
940
941 if (arm_dc_feature(s, ARM_FEATURE_M) &&
942 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
943 opc |= MO_ALIGN;
944 }
945
946 addr = gen_aa32_addr(s, a32, opc);
947 tcg_gen_qemu_ld_i32(val, addr, index, opc);
948 tcg_temp_free(addr);
949 }
950
951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
952 int index, MemOp opc)
953 {
954 TCGv addr;
955
956 if (arm_dc_feature(s, ARM_FEATURE_M) &&
957 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
958 opc |= MO_ALIGN;
959 }
960
961 addr = gen_aa32_addr(s, a32, opc);
962 tcg_gen_qemu_st_i32(val, addr, index, opc);
963 tcg_temp_free(addr);
964 }
965
966 #define DO_GEN_LD(SUFF, OPC) \
967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
968 TCGv_i32 a32, int index) \
969 { \
970 gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
971 }
972
973 #define DO_GEN_ST(SUFF, OPC) \
974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
975 TCGv_i32 a32, int index) \
976 { \
977 gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
978 }
979
980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
981 {
982 /* Not needed for user-mode BE32, where we use MO_BE instead. */
983 if (!IS_USER_ONLY && s->sctlr_b) {
984 tcg_gen_rotri_i64(val, val, 32);
985 }
986 }
987
988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
989 int index, MemOp opc)
990 {
991 TCGv addr = gen_aa32_addr(s, a32, opc);
992 tcg_gen_qemu_ld_i64(val, addr, index, opc);
993 gen_aa32_frob64(s, val);
994 tcg_temp_free(addr);
995 }
996
997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
998 TCGv_i32 a32, int index)
999 {
1000 gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1001 }
1002
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004 int index, MemOp opc)
1005 {
1006 TCGv addr = gen_aa32_addr(s, a32, opc);
1007
1008 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1009 if (!IS_USER_ONLY && s->sctlr_b) {
1010 TCGv_i64 tmp = tcg_temp_new_i64();
1011 tcg_gen_rotri_i64(tmp, val, 32);
1012 tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013 tcg_temp_free_i64(tmp);
1014 } else {
1015 tcg_gen_qemu_st_i64(val, addr, index, opc);
1016 }
1017 tcg_temp_free(addr);
1018 }
1019
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021 TCGv_i32 a32, int index)
1022 {
1023 gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1024 }
1025
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1032
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1034 {
1035 /* The pre HVC helper handles cases when HVC gets trapped
1036 * as an undefined insn by runtime configuration (ie before
1037 * the insn really executes).
1038 */
1039 gen_set_pc_im(s, s->pc_curr);
1040 gen_helper_pre_hvc(cpu_env);
1041 /* Otherwise we will treat this as a real exception which
1042 * happens after execution of the insn. (The distinction matters
1043 * for the PC value reported to the exception handler and also
1044 * for single stepping.)
1045 */
1046 s->svc_imm = imm16;
1047 gen_set_pc_im(s, s->base.pc_next);
1048 s->base.is_jmp = DISAS_HVC;
1049 }
1050
1051 static inline void gen_smc(DisasContext *s)
1052 {
1053 /* As with HVC, we may take an exception either before or after
1054 * the insn executes.
1055 */
1056 TCGv_i32 tmp;
1057
1058 gen_set_pc_im(s, s->pc_curr);
1059 tmp = tcg_const_i32(syn_aa32_smc());
1060 gen_helper_pre_smc(cpu_env, tmp);
1061 tcg_temp_free_i32(tmp);
1062 gen_set_pc_im(s, s->base.pc_next);
1063 s->base.is_jmp = DISAS_SMC;
1064 }
1065
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1067 {
1068 gen_set_condexec(s);
1069 gen_set_pc_im(s, pc);
1070 gen_exception_internal(excp);
1071 s->base.is_jmp = DISAS_NORETURN;
1072 }
1073
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075 int syn, uint32_t target_el)
1076 {
1077 gen_set_condexec(s);
1078 gen_set_pc_im(s, pc);
1079 gen_exception(excp, syn, target_el);
1080 s->base.is_jmp = DISAS_NORETURN;
1081 }
1082
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1084 {
1085 TCGv_i32 tcg_syn;
1086
1087 gen_set_condexec(s);
1088 gen_set_pc_im(s, s->pc_curr);
1089 tcg_syn = tcg_const_i32(syn);
1090 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091 tcg_temp_free_i32(tcg_syn);
1092 s->base.is_jmp = DISAS_NORETURN;
1093 }
1094
1095 static void unallocated_encoding(DisasContext *s)
1096 {
1097 /* Unallocated and reserved encodings are uncategorized */
1098 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099 default_exception_el(s));
1100 }
1101
1102 /* Force a TB lookup after an instruction that changes the CPU state. */
1103 static inline void gen_lookup_tb(DisasContext *s)
1104 {
1105 tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106 s->base.is_jmp = DISAS_EXIT;
1107 }
1108
1109 static inline void gen_hlt(DisasContext *s, int imm)
1110 {
1111 /* HLT. This has two purposes.
1112 * Architecturally, it is an external halting debug instruction.
1113 * Since QEMU doesn't implement external debug, we treat this as
1114 * it is required for halting debug disabled: it will UNDEF.
1115 * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116 * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117 * must trigger semihosting even for ARMv7 and earlier, where
1118 * HLT was an undefined encoding.
1119 * In system mode, we don't allow userspace access to
1120 * semihosting, to provide some semblance of security
1121 * (and for consistency with our 32-bit semihosting).
1122 */
1123 if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125 s->current_el != 0 &&
1126 #endif
1127 (imm == (s->thumb ? 0x3c : 0xf000))) {
1128 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129 return;
1130 }
1131
1132 unallocated_encoding(s);
1133 }
1134
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1136 {
1137 TCGv_ptr statusptr = tcg_temp_new_ptr();
1138 int offset;
1139 if (neon) {
1140 offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141 } else {
1142 offset = offsetof(CPUARMState, vfp.fp_status);
1143 }
1144 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145 return statusptr;
1146 }
1147
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1149 {
1150 if (dp) {
1151 return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152 } else {
1153 long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154 if (reg & 1) {
1155 ofs += offsetof(CPU_DoubleU, l.upper);
1156 } else {
1157 ofs += offsetof(CPU_DoubleU, l.lower);
1158 }
1159 return ofs;
1160 }
1161 }
1162
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164 zero is the least significant end of the register. */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1167 {
1168 int sreg;
1169 sreg = reg * 2 + n;
1170 return vfp_reg_offset(0, sreg);
1171 }
1172
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174 * where 0 is the least significant end of the register.
1175 */
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1178 {
1179 int element_size = 1 << size;
1180 int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182 /* Calculate the offset assuming fully little-endian,
1183 * then XOR to account for the order of the 8-byte units.
1184 */
1185 if (element_size < 8) {
1186 ofs ^= 8 - element_size;
1187 }
1188 #endif
1189 return neon_reg_offset(reg, 0) + ofs;
1190 }
1191
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1193 {
1194 TCGv_i32 tmp = tcg_temp_new_i32();
1195 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196 return tmp;
1197 }
1198
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1200 {
1201 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1202
1203 switch (mop) {
1204 case MO_UB:
1205 tcg_gen_ld8u_i32(var, cpu_env, offset);
1206 break;
1207 case MO_UW:
1208 tcg_gen_ld16u_i32(var, cpu_env, offset);
1209 break;
1210 case MO_UL:
1211 tcg_gen_ld_i32(var, cpu_env, offset);
1212 break;
1213 default:
1214 g_assert_not_reached();
1215 }
1216 }
1217
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1219 {
1220 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1221
1222 switch (mop) {
1223 case MO_UB:
1224 tcg_gen_ld8u_i64(var, cpu_env, offset);
1225 break;
1226 case MO_UW:
1227 tcg_gen_ld16u_i64(var, cpu_env, offset);
1228 break;
1229 case MO_UL:
1230 tcg_gen_ld32u_i64(var, cpu_env, offset);
1231 break;
1232 case MO_Q:
1233 tcg_gen_ld_i64(var, cpu_env, offset);
1234 break;
1235 default:
1236 g_assert_not_reached();
1237 }
1238 }
1239
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1241 {
1242 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243 tcg_temp_free_i32(var);
1244 }
1245
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1247 {
1248 long offset = neon_element_offset(reg, ele, size);
1249
1250 switch (size) {
1251 case MO_8:
1252 tcg_gen_st8_i32(var, cpu_env, offset);
1253 break;
1254 case MO_16:
1255 tcg_gen_st16_i32(var, cpu_env, offset);
1256 break;
1257 case MO_32:
1258 tcg_gen_st_i32(var, cpu_env, offset);
1259 break;
1260 default:
1261 g_assert_not_reached();
1262 }
1263 }
1264
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1266 {
1267 long offset = neon_element_offset(reg, ele, size);
1268
1269 switch (size) {
1270 case MO_8:
1271 tcg_gen_st8_i64(var, cpu_env, offset);
1272 break;
1273 case MO_16:
1274 tcg_gen_st16_i64(var, cpu_env, offset);
1275 break;
1276 case MO_32:
1277 tcg_gen_st32_i64(var, cpu_env, offset);
1278 break;
1279 case MO_64:
1280 tcg_gen_st_i64(var, cpu_env, offset);
1281 break;
1282 default:
1283 g_assert_not_reached();
1284 }
1285 }
1286
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1288 {
1289 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1290 }
1291
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1293 {
1294 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1295 }
1296
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1298 {
1299 tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1300 }
1301
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1303 {
1304 tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1305 }
1306
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1308 {
1309 TCGv_ptr ret = tcg_temp_new_ptr();
1310 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311 return ret;
1312 }
1313
1314 #define ARM_CP_RW_BIT (1 << 20)
1315
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1319
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1321 {
1322 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1323 }
1324
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1326 {
1327 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1328 }
1329
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1331 {
1332 TCGv_i32 var = tcg_temp_new_i32();
1333 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334 return var;
1335 }
1336
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1338 {
1339 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340 tcg_temp_free_i32(var);
1341 }
1342
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1344 {
1345 iwmmxt_store_reg(cpu_M0, rn);
1346 }
1347
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1349 {
1350 iwmmxt_load_reg(cpu_M0, rn);
1351 }
1352
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1354 {
1355 iwmmxt_load_reg(cpu_V1, rn);
1356 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1357 }
1358
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1360 {
1361 iwmmxt_load_reg(cpu_V1, rn);
1362 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1363 }
1364
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1366 {
1367 iwmmxt_load_reg(cpu_V1, rn);
1368 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1369 }
1370
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1373 { \
1374 iwmmxt_load_reg(cpu_V1, rn); \
1375 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1376 }
1377
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1380 { \
1381 iwmmxt_load_reg(cpu_V1, rn); \
1382 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1383 }
1384
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1389
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1392 { \
1393 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1394 }
1395
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1406
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1409
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1422
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1426
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1431
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1438
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1443
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1450
1451 static void gen_op_iwmmxt_set_mup(void)
1452 {
1453 TCGv_i32 tmp;
1454 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455 tcg_gen_ori_i32(tmp, tmp, 2);
1456 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1457 }
1458
1459 static void gen_op_iwmmxt_set_cup(void)
1460 {
1461 TCGv_i32 tmp;
1462 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463 tcg_gen_ori_i32(tmp, tmp, 1);
1464 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1465 }
1466
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1468 {
1469 TCGv_i32 tmp = tcg_temp_new_i32();
1470 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1472 }
1473
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1475 {
1476 iwmmxt_load_reg(cpu_V1, rn);
1477 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1479 }
1480
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482 TCGv_i32 dest)
1483 {
1484 int rd;
1485 uint32_t offset;
1486 TCGv_i32 tmp;
1487
1488 rd = (insn >> 16) & 0xf;
1489 tmp = load_reg(s, rd);
1490
1491 offset = (insn & 0xff) << ((insn >> 7) & 2);
1492 if (insn & (1 << 24)) {
1493 /* Pre indexed */
1494 if (insn & (1 << 23))
1495 tcg_gen_addi_i32(tmp, tmp, offset);
1496 else
1497 tcg_gen_addi_i32(tmp, tmp, -offset);
1498 tcg_gen_mov_i32(dest, tmp);
1499 if (insn & (1 << 21))
1500 store_reg(s, rd, tmp);
1501 else
1502 tcg_temp_free_i32(tmp);
1503 } else if (insn & (1 << 21)) {
1504 /* Post indexed */
1505 tcg_gen_mov_i32(dest, tmp);
1506 if (insn & (1 << 23))
1507 tcg_gen_addi_i32(tmp, tmp, offset);
1508 else
1509 tcg_gen_addi_i32(tmp, tmp, -offset);
1510 store_reg(s, rd, tmp);
1511 } else if (!(insn & (1 << 23)))
1512 return 1;
1513 return 0;
1514 }
1515
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1517 {
1518 int rd = (insn >> 0) & 0xf;
1519 TCGv_i32 tmp;
1520
1521 if (insn & (1 << 8)) {
1522 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523 return 1;
1524 } else {
1525 tmp = iwmmxt_load_creg(rd);
1526 }
1527 } else {
1528 tmp = tcg_temp_new_i32();
1529 iwmmxt_load_reg(cpu_V0, rd);
1530 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1531 }
1532 tcg_gen_andi_i32(tmp, tmp, mask);
1533 tcg_gen_mov_i32(dest, tmp);
1534 tcg_temp_free_i32(tmp);
1535 return 0;
1536 }
1537
1538 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1539 (ie. an undefined instruction). */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1541 {
1542 int rd, wrd;
1543 int rdhi, rdlo, rd0, rd1, i;
1544 TCGv_i32 addr;
1545 TCGv_i32 tmp, tmp2, tmp3;
1546
1547 if ((insn & 0x0e000e00) == 0x0c000000) {
1548 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549 wrd = insn & 0xf;
1550 rdlo = (insn >> 12) & 0xf;
1551 rdhi = (insn >> 16) & 0xf;
1552 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1553 iwmmxt_load_reg(cpu_V0, wrd);
1554 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556 } else { /* TMCRR */
1557 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558 iwmmxt_store_reg(cpu_V0, wrd);
1559 gen_op_iwmmxt_set_mup();
1560 }
1561 return 0;
1562 }
1563
1564 wrd = (insn >> 12) & 0xf;
1565 addr = tcg_temp_new_i32();
1566 if (gen_iwmmxt_address(s, insn, addr)) {
1567 tcg_temp_free_i32(addr);
1568 return 1;
1569 }
1570 if (insn & ARM_CP_RW_BIT) {
1571 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1572 tmp = tcg_temp_new_i32();
1573 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574 iwmmxt_store_creg(wrd, tmp);
1575 } else {
1576 i = 1;
1577 if (insn & (1 << 8)) {
1578 if (insn & (1 << 22)) { /* WLDRD */
1579 gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580 i = 0;
1581 } else { /* WLDRW wRd */
1582 tmp = tcg_temp_new_i32();
1583 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1584 }
1585 } else {
1586 tmp = tcg_temp_new_i32();
1587 if (insn & (1 << 22)) { /* WLDRH */
1588 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589 } else { /* WLDRB */
1590 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1591 }
1592 }
1593 if (i) {
1594 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595 tcg_temp_free_i32(tmp);
1596 }
1597 gen_op_iwmmxt_movq_wRn_M0(wrd);
1598 }
1599 } else {
1600 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1601 tmp = iwmmxt_load_creg(wrd);
1602 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603 } else {
1604 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605 tmp = tcg_temp_new_i32();
1606 if (insn & (1 << 8)) {
1607 if (insn & (1 << 22)) { /* WSTRD */
1608 gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609 } else { /* WSTRW wRd */
1610 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1612 }
1613 } else {
1614 if (insn & (1 << 22)) { /* WSTRH */
1615 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617 } else { /* WSTRB */
1618 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1620 }
1621 }
1622 }
1623 tcg_temp_free_i32(tmp);
1624 }
1625 tcg_temp_free_i32(addr);
1626 return 0;
1627 }
1628
1629 if ((insn & 0x0f000000) != 0x0e000000)
1630 return 1;
1631
1632 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633 case 0x000: /* WOR */
1634 wrd = (insn >> 12) & 0xf;
1635 rd0 = (insn >> 0) & 0xf;
1636 rd1 = (insn >> 16) & 0xf;
1637 gen_op_iwmmxt_movq_M0_wRn(rd0);
1638 gen_op_iwmmxt_orq_M0_wRn(rd1);
1639 gen_op_iwmmxt_setpsr_nz();
1640 gen_op_iwmmxt_movq_wRn_M0(wrd);
1641 gen_op_iwmmxt_set_mup();
1642 gen_op_iwmmxt_set_cup();
1643 break;
1644 case 0x011: /* TMCR */
1645 if (insn & 0xf)
1646 return 1;
1647 rd = (insn >> 12) & 0xf;
1648 wrd = (insn >> 16) & 0xf;
1649 switch (wrd) {
1650 case ARM_IWMMXT_wCID:
1651 case ARM_IWMMXT_wCASF:
1652 break;
1653 case ARM_IWMMXT_wCon:
1654 gen_op_iwmmxt_set_cup();
1655 /* Fall through. */
1656 case ARM_IWMMXT_wCSSF:
1657 tmp = iwmmxt_load_creg(wrd);
1658 tmp2 = load_reg(s, rd);
1659 tcg_gen_andc_i32(tmp, tmp, tmp2);
1660 tcg_temp_free_i32(tmp2);
1661 iwmmxt_store_creg(wrd, tmp);
1662 break;
1663 case ARM_IWMMXT_wCGR0:
1664 case ARM_IWMMXT_wCGR1:
1665 case ARM_IWMMXT_wCGR2:
1666 case ARM_IWMMXT_wCGR3:
1667 gen_op_iwmmxt_set_cup();
1668 tmp = load_reg(s, rd);
1669 iwmmxt_store_creg(wrd, tmp);
1670 break;
1671 default:
1672 return 1;
1673 }
1674 break;
1675 case 0x100: /* WXOR */
1676 wrd = (insn >> 12) & 0xf;
1677 rd0 = (insn >> 0) & 0xf;
1678 rd1 = (insn >> 16) & 0xf;
1679 gen_op_iwmmxt_movq_M0_wRn(rd0);
1680 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681 gen_op_iwmmxt_setpsr_nz();
1682 gen_op_iwmmxt_movq_wRn_M0(wrd);
1683 gen_op_iwmmxt_set_mup();
1684 gen_op_iwmmxt_set_cup();
1685 break;
1686 case 0x111: /* TMRC */
1687 if (insn & 0xf)
1688 return 1;
1689 rd = (insn >> 12) & 0xf;
1690 wrd = (insn >> 16) & 0xf;
1691 tmp = iwmmxt_load_creg(wrd);
1692 store_reg(s, rd, tmp);
1693 break;
1694 case 0x300: /* WANDN */
1695 wrd = (insn >> 12) & 0xf;
1696 rd0 = (insn >> 0) & 0xf;
1697 rd1 = (insn >> 16) & 0xf;
1698 gen_op_iwmmxt_movq_M0_wRn(rd0);
1699 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700 gen_op_iwmmxt_andq_M0_wRn(rd1);
1701 gen_op_iwmmxt_setpsr_nz();
1702 gen_op_iwmmxt_movq_wRn_M0(wrd);
1703 gen_op_iwmmxt_set_mup();
1704 gen_op_iwmmxt_set_cup();
1705 break;
1706 case 0x200: /* WAND */
1707 wrd = (insn >> 12) & 0xf;
1708 rd0 = (insn >> 0) & 0xf;
1709 rd1 = (insn >> 16) & 0xf;
1710 gen_op_iwmmxt_movq_M0_wRn(rd0);
1711 gen_op_iwmmxt_andq_M0_wRn(rd1);
1712 gen_op_iwmmxt_setpsr_nz();
1713 gen_op_iwmmxt_movq_wRn_M0(wrd);
1714 gen_op_iwmmxt_set_mup();
1715 gen_op_iwmmxt_set_cup();
1716 break;
1717 case 0x810: case 0xa10: /* WMADD */
1718 wrd = (insn >> 12) & 0xf;
1719 rd0 = (insn >> 0) & 0xf;
1720 rd1 = (insn >> 16) & 0xf;
1721 gen_op_iwmmxt_movq_M0_wRn(rd0);
1722 if (insn & (1 << 21))
1723 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724 else
1725 gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726 gen_op_iwmmxt_movq_wRn_M0(wrd);
1727 gen_op_iwmmxt_set_mup();
1728 break;
1729 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
1730 wrd = (insn >> 12) & 0xf;
1731 rd0 = (insn >> 16) & 0xf;
1732 rd1 = (insn >> 0) & 0xf;
1733 gen_op_iwmmxt_movq_M0_wRn(rd0);
1734 switch ((insn >> 22) & 3) {
1735 case 0:
1736 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737 break;
1738 case 1:
1739 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740 break;
1741 case 2:
1742 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743 break;
1744 case 3:
1745 return 1;
1746 }
1747 gen_op_iwmmxt_movq_wRn_M0(wrd);
1748 gen_op_iwmmxt_set_mup();
1749 gen_op_iwmmxt_set_cup();
1750 break;
1751 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
1752 wrd = (insn >> 12) & 0xf;
1753 rd0 = (insn >> 16) & 0xf;
1754 rd1 = (insn >> 0) & 0xf;
1755 gen_op_iwmmxt_movq_M0_wRn(rd0);
1756 switch ((insn >> 22) & 3) {
1757 case 0:
1758 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759 break;
1760 case 1:
1761 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762 break;
1763 case 2:
1764 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765 break;
1766 case 3:
1767 return 1;
1768 }
1769 gen_op_iwmmxt_movq_wRn_M0(wrd);
1770 gen_op_iwmmxt_set_mup();
1771 gen_op_iwmmxt_set_cup();
1772 break;
1773 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
1774 wrd = (insn >> 12) & 0xf;
1775 rd0 = (insn >> 16) & 0xf;
1776 rd1 = (insn >> 0) & 0xf;
1777 gen_op_iwmmxt_movq_M0_wRn(rd0);
1778 if (insn & (1 << 22))
1779 gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780 else
1781 gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782 if (!(insn & (1 << 20)))
1783 gen_op_iwmmxt_addl_M0_wRn(wrd);
1784 gen_op_iwmmxt_movq_wRn_M0(wrd);
1785 gen_op_iwmmxt_set_mup();
1786 break;
1787 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
1788 wrd = (insn >> 12) & 0xf;
1789 rd0 = (insn >> 16) & 0xf;
1790 rd1 = (insn >> 0) & 0xf;
1791 gen_op_iwmmxt_movq_M0_wRn(rd0);
1792 if (insn & (1 << 21)) {
1793 if (insn & (1 << 20))
1794 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795 else
1796 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797 } else {
1798 if (insn & (1 << 20))
1799 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800 else
1801 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1802 }
1803 gen_op_iwmmxt_movq_wRn_M0(wrd);
1804 gen_op_iwmmxt_set_mup();
1805 break;
1806 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
1807 wrd = (insn >> 12) & 0xf;
1808 rd0 = (insn >> 16) & 0xf;
1809 rd1 = (insn >> 0) & 0xf;
1810 gen_op_iwmmxt_movq_M0_wRn(rd0);
1811 if (insn & (1 << 21))
1812 gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813 else
1814 gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815 if (!(insn & (1 << 20))) {
1816 iwmmxt_load_reg(cpu_V1, wrd);
1817 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1818 }
1819 gen_op_iwmmxt_movq_wRn_M0(wrd);
1820 gen_op_iwmmxt_set_mup();
1821 break;
1822 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
1823 wrd = (insn >> 12) & 0xf;
1824 rd0 = (insn >> 16) & 0xf;
1825 rd1 = (insn >> 0) & 0xf;
1826 gen_op_iwmmxt_movq_M0_wRn(rd0);
1827 switch ((insn >> 22) & 3) {
1828 case 0:
1829 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830 break;
1831 case 1:
1832 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833 break;
1834 case 2:
1835 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836 break;
1837 case 3:
1838 return 1;
1839 }
1840 gen_op_iwmmxt_movq_wRn_M0(wrd);
1841 gen_op_iwmmxt_set_mup();
1842 gen_op_iwmmxt_set_cup();
1843 break;
1844 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
1845 wrd = (insn >> 12) & 0xf;
1846 rd0 = (insn >> 16) & 0xf;
1847 rd1 = (insn >> 0) & 0xf;
1848 gen_op_iwmmxt_movq_M0_wRn(rd0);
1849 if (insn & (1 << 22)) {
1850 if (insn & (1 << 20))
1851 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852 else
1853 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854 } else {
1855 if (insn & (1 << 20))
1856 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857 else
1858 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1859 }
1860 gen_op_iwmmxt_movq_wRn_M0(wrd);
1861 gen_op_iwmmxt_set_mup();
1862 gen_op_iwmmxt_set_cup();
1863 break;
1864 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
1865 wrd = (insn >> 12) & 0xf;
1866 rd0 = (insn >> 16) & 0xf;
1867 rd1 = (insn >> 0) & 0xf;
1868 gen_op_iwmmxt_movq_M0_wRn(rd0);
1869 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870 tcg_gen_andi_i32(tmp, tmp, 7);
1871 iwmmxt_load_reg(cpu_V1, rd1);
1872 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873 tcg_temp_free_i32(tmp);
1874 gen_op_iwmmxt_movq_wRn_M0(wrd);
1875 gen_op_iwmmxt_set_mup();
1876 break;
1877 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
1878 if (((insn >> 6) & 3) == 3)
1879 return 1;
1880 rd = (insn >> 12) & 0xf;
1881 wrd = (insn >> 16) & 0xf;
1882 tmp = load_reg(s, rd);
1883 gen_op_iwmmxt_movq_M0_wRn(wrd);
1884 switch ((insn >> 6) & 3) {
1885 case 0:
1886 tmp2 = tcg_const_i32(0xff);
1887 tmp3 = tcg_const_i32((insn & 7) << 3);
1888 break;
1889 case 1:
1890 tmp2 = tcg_const_i32(0xffff);
1891 tmp3 = tcg_const_i32((insn & 3) << 4);
1892 break;
1893 case 2:
1894 tmp2 = tcg_const_i32(0xffffffff);
1895 tmp3 = tcg_const_i32((insn & 1) << 5);
1896 break;
1897 default:
1898 tmp2 = NULL;
1899 tmp3 = NULL;
1900 }
1901 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902 tcg_temp_free_i32(tmp3);
1903 tcg_temp_free_i32(tmp2);
1904 tcg_temp_free_i32(tmp);
1905 gen_op_iwmmxt_movq_wRn_M0(wrd);
1906 gen_op_iwmmxt_set_mup();
1907 break;
1908 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
1909 rd = (insn >> 12) & 0xf;
1910 wrd = (insn >> 16) & 0xf;
1911 if (rd == 15 || ((insn >> 22) & 3) == 3)
1912 return 1;
1913 gen_op_iwmmxt_movq_M0_wRn(wrd);
1914 tmp = tcg_temp_new_i32();
1915 switch ((insn >> 22) & 3) {
1916 case 0:
1917 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919 if (insn & 8) {
1920 tcg_gen_ext8s_i32(tmp, tmp);
1921 } else {
1922 tcg_gen_andi_i32(tmp, tmp, 0xff);
1923 }
1924 break;
1925 case 1:
1926 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928 if (insn & 8) {
1929 tcg_gen_ext16s_i32(tmp, tmp);
1930 } else {
1931 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1932 }
1933 break;
1934 case 2:
1935 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937 break;
1938 }
1939 store_reg(s, rd, tmp);
1940 break;
1941 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
1942 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943 return 1;
1944 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945 switch ((insn >> 22) & 3) {
1946 case 0:
1947 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948 break;
1949 case 1:
1950 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951 break;
1952 case 2:
1953 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954 break;
1955 }
1956 tcg_gen_shli_i32(tmp, tmp, 28);
1957 gen_set_nzcv(tmp);
1958 tcg_temp_free_i32(tmp);
1959 break;
1960 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
1961 if (((insn >> 6) & 3) == 3)
1962 return 1;
1963 rd = (insn >> 12) & 0xf;
1964 wrd = (insn >> 16) & 0xf;
1965 tmp = load_reg(s, rd);
1966 switch ((insn >> 6) & 3) {
1967 case 0:
1968 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969 break;
1970 case 1:
1971 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972 break;
1973 case 2:
1974 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975 break;
1976 }
1977 tcg_temp_free_i32(tmp);
1978 gen_op_iwmmxt_movq_wRn_M0(wrd);
1979 gen_op_iwmmxt_set_mup();
1980 break;
1981 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
1982 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983 return 1;
1984 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985 tmp2 = tcg_temp_new_i32();
1986 tcg_gen_mov_i32(tmp2, tmp);
1987 switch ((insn >> 22) & 3) {
1988 case 0:
1989 for (i = 0; i < 7; i ++) {
1990 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991 tcg_gen_and_i32(tmp, tmp, tmp2);
1992 }
1993 break;
1994 case 1:
1995 for (i = 0; i < 3; i ++) {
1996 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997 tcg_gen_and_i32(tmp, tmp, tmp2);
1998 }
1999 break;
2000 case 2:
2001 tcg_gen_shli_i32(tmp2, tmp2, 16);
2002 tcg_gen_and_i32(tmp, tmp, tmp2);
2003 break;
2004 }
2005 gen_set_nzcv(tmp);
2006 tcg_temp_free_i32(tmp2);
2007 tcg_temp_free_i32(tmp);
2008 break;
2009 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
2010 wrd = (insn >> 12) & 0xf;
2011 rd0 = (insn >> 16) & 0xf;
2012 gen_op_iwmmxt_movq_M0_wRn(rd0);
2013 switch ((insn >> 22) & 3) {
2014 case 0:
2015 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016 break;
2017 case 1:
2018 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019 break;
2020 case 2:
2021 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022 break;
2023 case 3:
2024 return 1;
2025 }
2026 gen_op_iwmmxt_movq_wRn_M0(wrd);
2027 gen_op_iwmmxt_set_mup();
2028 break;
2029 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
2030 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031 return 1;
2032 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033 tmp2 = tcg_temp_new_i32();
2034 tcg_gen_mov_i32(tmp2, tmp);
2035 switch ((insn >> 22) & 3) {
2036 case 0:
2037 for (i = 0; i < 7; i ++) {
2038 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039 tcg_gen_or_i32(tmp, tmp, tmp2);
2040 }
2041 break;
2042 case 1:
2043 for (i = 0; i < 3; i ++) {
2044 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045 tcg_gen_or_i32(tmp, tmp, tmp2);
2046 }
2047 break;
2048 case 2:
2049 tcg_gen_shli_i32(tmp2, tmp2, 16);
2050 tcg_gen_or_i32(tmp, tmp, tmp2);
2051 break;
2052 }
2053 gen_set_nzcv(tmp);
2054 tcg_temp_free_i32(tmp2);
2055 tcg_temp_free_i32(tmp);
2056 break;
2057 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
2058 rd = (insn >> 12) & 0xf;
2059 rd0 = (insn >> 16) & 0xf;
2060 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061 return 1;
2062 gen_op_iwmmxt_movq_M0_wRn(rd0);
2063 tmp = tcg_temp_new_i32();
2064 switch ((insn >> 22) & 3) {
2065 case 0:
2066 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067 break;
2068 case 1:
2069 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070 break;
2071 case 2:
2072 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073 break;
2074 }
2075 store_reg(s, rd, tmp);
2076 break;
2077 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
2078 case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079 wrd = (insn >> 12) & 0xf;
2080 rd0 = (insn >> 16) & 0xf;
2081 rd1 = (insn >> 0) & 0xf;
2082 gen_op_iwmmxt_movq_M0_wRn(rd0);
2083 switch ((insn >> 22) & 3) {
2084 case 0:
2085 if (insn & (1 << 21))
2086 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087 else
2088 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089 break;
2090 case 1:
2091 if (insn & (1 << 21))
2092 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093 else
2094 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095 break;
2096 case 2:
2097 if (insn & (1 << 21))
2098 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099 else
2100 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101 break;
2102 case 3:
2103 return 1;
2104 }
2105 gen_op_iwmmxt_movq_wRn_M0(wrd);
2106 gen_op_iwmmxt_set_mup();
2107 gen_op_iwmmxt_set_cup();
2108 break;
2109 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
2110 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111 wrd = (insn >> 12) & 0xf;
2112 rd0 = (insn >> 16) & 0xf;
2113 gen_op_iwmmxt_movq_M0_wRn(rd0);
2114 switch ((insn >> 22) & 3) {
2115 case 0:
2116 if (insn & (1 << 21))
2117 gen_op_iwmmxt_unpacklsb_M0();
2118 else
2119 gen_op_iwmmxt_unpacklub_M0();
2120 break;
2121 case 1:
2122 if (insn & (1 << 21))
2123 gen_op_iwmmxt_unpacklsw_M0();
2124 else
2125 gen_op_iwmmxt_unpackluw_M0();
2126 break;
2127 case 2:
2128 if (insn & (1 << 21))
2129 gen_op_iwmmxt_unpacklsl_M0();
2130 else
2131 gen_op_iwmmxt_unpacklul_M0();
2132 break;
2133 case 3:
2134 return 1;
2135 }
2136 gen_op_iwmmxt_movq_wRn_M0(wrd);
2137 gen_op_iwmmxt_set_mup();
2138 gen_op_iwmmxt_set_cup();
2139 break;
2140 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2141 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142 wrd = (insn >> 12) & 0xf;
2143 rd0 = (insn >> 16) & 0xf;
2144 gen_op_iwmmxt_movq_M0_wRn(rd0);
2145 switch ((insn >> 22) & 3) {
2146 case 0:
2147 if (insn & (1 << 21))
2148 gen_op_iwmmxt_unpackhsb_M0();
2149 else
2150 gen_op_iwmmxt_unpackhub_M0();
2151 break;
2152 case 1:
2153 if (insn & (1 << 21))
2154 gen_op_iwmmxt_unpackhsw_M0();
2155 else
2156 gen_op_iwmmxt_unpackhuw_M0();
2157 break;
2158 case 2:
2159 if (insn & (1 << 21))
2160 gen_op_iwmmxt_unpackhsl_M0();
2161 else
2162 gen_op_iwmmxt_unpackhul_M0();
2163 break;
2164 case 3:
2165 return 1;
2166 }
2167 gen_op_iwmmxt_movq_wRn_M0(wrd);
2168 gen_op_iwmmxt_set_mup();
2169 gen_op_iwmmxt_set_cup();
2170 break;
2171 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2172 case 0x214: case 0x614: case 0xa14: case 0xe14:
2173 if (((insn >> 22) & 3) == 0)
2174 return 1;
2175 wrd = (insn >> 12) & 0xf;
2176 rd0 = (insn >> 16) & 0xf;
2177 gen_op_iwmmxt_movq_M0_wRn(rd0);
2178 tmp = tcg_temp_new_i32();
2179 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180 tcg_temp_free_i32(tmp);
2181 return 1;
2182 }
2183 switch ((insn >> 22) & 3) {
2184 case 1:
2185 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186 break;
2187 case 2:
2188 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189 break;
2190 case 3:
2191 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192 break;
2193 }
2194 tcg_temp_free_i32(tmp);
2195 gen_op_iwmmxt_movq_wRn_M0(wrd);
2196 gen_op_iwmmxt_set_mup();
2197 gen_op_iwmmxt_set_cup();
2198 break;
2199 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2200 case 0x014: case 0x414: case 0x814: case 0xc14:
2201 if (((insn >> 22) & 3) == 0)
2202 return 1;
2203 wrd = (insn >> 12) & 0xf;
2204 rd0 = (insn >> 16) & 0xf;
2205 gen_op_iwmmxt_movq_M0_wRn(rd0);
2206 tmp = tcg_temp_new_i32();
2207 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208 tcg_temp_free_i32(tmp);
2209 return 1;
2210 }
2211 switch ((insn >> 22) & 3) {
2212 case 1:
2213 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214 break;
2215 case 2:
2216 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217 break;
2218 case 3:
2219 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220 break;
2221 }
2222 tcg_temp_free_i32(tmp);
2223 gen_op_iwmmxt_movq_wRn_M0(wrd);
2224 gen_op_iwmmxt_set_mup();
2225 gen_op_iwmmxt_set_cup();
2226 break;
2227 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2228 case 0x114: case 0x514: case 0x914: case 0xd14:
2229 if (((insn >> 22) & 3) == 0)
2230 return 1;
2231 wrd = (insn >> 12) & 0xf;
2232 rd0 = (insn >> 16) & 0xf;
2233 gen_op_iwmmxt_movq_M0_wRn(rd0);
2234 tmp = tcg_temp_new_i32();
2235 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236 tcg_temp_free_i32(tmp);
2237 return 1;
2238 }
2239 switch ((insn >> 22) & 3) {
2240 case 1:
2241 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242 break;
2243 case 2:
2244 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245 break;
2246 case 3:
2247 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248 break;
2249 }
2250 tcg_temp_free_i32(tmp);
2251 gen_op_iwmmxt_movq_wRn_M0(wrd);
2252 gen_op_iwmmxt_set_mup();
2253 gen_op_iwmmxt_set_cup();
2254 break;
2255 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2256 case 0x314: case 0x714: case 0xb14: case 0xf14:
2257 if (((insn >> 22) & 3) == 0)
2258 return 1;
2259 wrd = (insn >> 12) & 0xf;
2260 rd0 = (insn >> 16) & 0xf;
2261 gen_op_iwmmxt_movq_M0_wRn(rd0);
2262 tmp = tcg_temp_new_i32();
2263 switch ((insn >> 22) & 3) {
2264 case 1:
2265 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266 tcg_temp_free_i32(tmp);
2267 return 1;
2268 }
2269 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270 break;
2271 case 2:
2272 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273 tcg_temp_free_i32(tmp);
2274 return 1;
2275 }
2276 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277 break;
2278 case 3:
2279 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280 tcg_temp_free_i32(tmp);
2281 return 1;
2282 }
2283 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284 break;
2285 }
2286 tcg_temp_free_i32(tmp);
2287 gen_op_iwmmxt_movq_wRn_M0(wrd);
2288 gen_op_iwmmxt_set_mup();
2289 gen_op_iwmmxt_set_cup();
2290 break;
2291 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2292 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293 wrd = (insn >> 12) & 0xf;
2294 rd0 = (insn >> 16) & 0xf;
2295 rd1 = (insn >> 0) & 0xf;
2296 gen_op_iwmmxt_movq_M0_wRn(rd0);
2297 switch ((insn >> 22) & 3) {
2298 case 0:
2299 if (insn & (1 << 21))
2300 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301 else
2302 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303 break;
2304 case 1:
2305 if (insn & (1 << 21))
2306 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307 else
2308 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309 break;
2310 case 2:
2311 if (insn & (1 << 21))
2312 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313 else
2314 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315 break;
2316 case 3:
2317 return 1;
2318 }
2319 gen_op_iwmmxt_movq_wRn_M0(wrd);
2320 gen_op_iwmmxt_set_mup();
2321 break;
2322 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2323 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324 wrd = (insn >> 12) & 0xf;
2325 rd0 = (insn >> 16) & 0xf;
2326 rd1 = (insn >> 0) & 0xf;
2327 gen_op_iwmmxt_movq_M0_wRn(rd0);
2328 switch ((insn >> 22) & 3) {
2329 case 0:
2330 if (insn & (1 << 21))
2331 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332 else
2333 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334 break;
2335 case 1:
2336 if (insn & (1 << 21))
2337 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338 else
2339 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340 break;
2341 case 2:
2342 if (insn & (1 << 21))
2343 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344 else
2345 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346 break;
2347 case 3:
2348 return 1;
2349 }
2350 gen_op_iwmmxt_movq_wRn_M0(wrd);
2351 gen_op_iwmmxt_set_mup();
2352 break;
2353 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2354 case 0x402: case 0x502: case 0x602: case 0x702:
2355 wrd = (insn >> 12) & 0xf;
2356 rd0 = (insn >> 16) & 0xf;
2357 rd1 = (insn >> 0) & 0xf;
2358 gen_op_iwmmxt_movq_M0_wRn(rd0);
2359 tmp = tcg_const_i32((insn >> 20) & 3);
2360 iwmmxt_load_reg(cpu_V1, rd1);
2361 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362 tcg_temp_free_i32(tmp);
2363 gen_op_iwmmxt_movq_wRn_M0(wrd);
2364 gen_op_iwmmxt_set_mup();
2365 break;
2366 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2367 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370 wrd = (insn >> 12) & 0xf;
2371 rd0 = (insn >> 16) & 0xf;
2372 rd1 = (insn >> 0) & 0xf;
2373 gen_op_iwmmxt_movq_M0_wRn(rd0);
2374 switch ((insn >> 20) & 0xf) {
2375 case 0x0:
2376 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377 break;
2378 case 0x1:
2379 gen_op_iwmmxt_subub_M0_wRn(rd1);
2380 break;
2381 case 0x3:
2382 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383 break;
2384 case 0x4:
2385 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386 break;
2387 case 0x5:
2388 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389 break;
2390 case 0x7:
2391 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392 break;
2393 case 0x8:
2394 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395 break;
2396 case 0x9:
2397 gen_op_iwmmxt_subul_M0_wRn(rd1);
2398 break;
2399 case 0xb:
2400 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401 break;
2402 default:
2403 return 1;
2404 }
2405 gen_op_iwmmxt_movq_wRn_M0(wrd);
2406 gen_op_iwmmxt_set_mup();
2407 gen_op_iwmmxt_set_cup();
2408 break;
2409 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2410 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413 wrd = (insn >> 12) & 0xf;
2414 rd0 = (insn >> 16) & 0xf;
2415 gen_op_iwmmxt_movq_M0_wRn(rd0);
2416 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418 tcg_temp_free_i32(tmp);
2419 gen_op_iwmmxt_movq_wRn_M0(wrd);
2420 gen_op_iwmmxt_set_mup();
2421 gen_op_iwmmxt_set_cup();
2422 break;
2423 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2424 case 0x418: case 0x518: case 0x618: case 0x718:
2425 case 0x818: case 0x918: case 0xa18: case 0xb18:
2426 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427 wrd = (insn >> 12) & 0xf;
2428 rd0 = (insn >> 16) & 0xf;
2429 rd1 = (insn >> 0) & 0xf;
2430 gen_op_iwmmxt_movq_M0_wRn(rd0);
2431 switch ((insn >> 20) & 0xf) {
2432 case 0x0:
2433 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434 break;
2435 case 0x1:
2436 gen_op_iwmmxt_addub_M0_wRn(rd1);
2437 break;
2438 case 0x3:
2439 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440 break;
2441 case 0x4:
2442 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443 break;
2444 case 0x5:
2445 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446 break;
2447 case 0x7:
2448 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449 break;
2450 case 0x8:
2451 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452 break;
2453 case 0x9:
2454 gen_op_iwmmxt_addul_M0_wRn(rd1);
2455 break;
2456 case 0xb:
2457 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458 break;
2459 default:
2460 return 1;
2461 }
2462 gen_op_iwmmxt_movq_wRn_M0(wrd);
2463 gen_op_iwmmxt_set_mup();
2464 gen_op_iwmmxt_set_cup();
2465 break;
2466 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2467 case 0x408: case 0x508: case 0x608: case 0x708:
2468 case 0x808: case 0x908: case 0xa08: case 0xb08:
2469 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471 return 1;
2472 wrd = (insn >> 12) & 0xf;
2473 rd0 = (insn >> 16) & 0xf;
2474 rd1 = (insn >> 0) & 0xf;
2475 gen_op_iwmmxt_movq_M0_wRn(rd0);
2476 switch ((insn >> 22) & 3) {
2477 case 1:
2478 if (insn & (1 << 21))
2479 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480 else
2481 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482 break;
2483 case 2:
2484 if (insn & (1 << 21))
2485 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486 else
2487 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488 break;
2489 case 3:
2490 if (insn & (1 << 21))
2491 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492 else
2493 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494 break;
2495 }
2496 gen_op_iwmmxt_movq_wRn_M0(wrd);
2497 gen_op_iwmmxt_set_mup();
2498 gen_op_iwmmxt_set_cup();
2499 break;
2500 case 0x201: case 0x203: case 0x205: case 0x207:
2501 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502 case 0x211: case 0x213: case 0x215: case 0x217:
2503 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504 wrd = (insn >> 5) & 0xf;
2505 rd0 = (insn >> 12) & 0xf;
2506 rd1 = (insn >> 0) & 0xf;
2507 if (rd0 == 0xf || rd1 == 0xf)
2508 return 1;
2509 gen_op_iwmmxt_movq_M0_wRn(wrd);
2510 tmp = load_reg(s, rd0);
2511 tmp2 = load_reg(s, rd1);
2512 switch ((insn >> 16) & 0xf) {
2513 case 0x0: /* TMIA */
2514 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515 break;
2516 case 0x8: /* TMIAPH */
2517 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518 break;
2519 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2520 if (insn & (1 << 16))
2521 tcg_gen_shri_i32(tmp, tmp, 16);
2522 if (insn & (1 << 17))
2523 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525 break;
2526 default:
2527 tcg_temp_free_i32(tmp2);
2528 tcg_temp_free_i32(tmp);
2529 return 1;
2530 }
2531 tcg_temp_free_i32(tmp2);
2532 tcg_temp_free_i32(tmp);
2533 gen_op_iwmmxt_movq_wRn_M0(wrd);
2534 gen_op_iwmmxt_set_mup();
2535 break;
2536 default:
2537 return 1;
2538 }
2539
2540 return 0;
2541 }
2542
2543 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2544 (ie. an undefined instruction). */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2546 {
2547 int acc, rd0, rd1, rdhi, rdlo;
2548 TCGv_i32 tmp, tmp2;
2549
2550 if ((insn & 0x0ff00f10) == 0x0e200010) {
2551 /* Multiply with Internal Accumulate Format */
2552 rd0 = (insn >> 12) & 0xf;
2553 rd1 = insn & 0xf;
2554 acc = (insn >> 5) & 7;
2555
2556 if (acc != 0)
2557 return 1;
2558
2559 tmp = load_reg(s, rd0);
2560 tmp2 = load_reg(s, rd1);
2561 switch ((insn >> 16) & 0xf) {
2562 case 0x0: /* MIA */
2563 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564 break;
2565 case 0x8: /* MIAPH */
2566 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567 break;
2568 case 0xc: /* MIABB */
2569 case 0xd: /* MIABT */
2570 case 0xe: /* MIATB */
2571 case 0xf: /* MIATT */
2572 if (insn & (1 << 16))
2573 tcg_gen_shri_i32(tmp, tmp, 16);
2574 if (insn & (1 << 17))
2575 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577 break;
2578 default:
2579 return 1;
2580 }
2581 tcg_temp_free_i32(tmp2);
2582 tcg_temp_free_i32(tmp);
2583
2584 gen_op_iwmmxt_movq_wRn_M0(acc);
2585 return 0;
2586 }
2587
2588 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589 /* Internal Accumulator Access Format */
2590 rdhi = (insn >> 16) & 0xf;
2591 rdlo = (insn >> 12) & 0xf;
2592 acc = insn & 7;
2593
2594 if (acc != 0)
2595 return 1;
2596
2597 if (insn & ARM_CP_RW_BIT) { /* MRA */
2598 iwmmxt_load_reg(cpu_V0, acc);
2599 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602 } else { /* MAR */
2603 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604 iwmmxt_store_reg(cpu_V0, acc);
2605 }
2606 return 0;
2607 }
2608
2609 return 1;
2610 }
2611
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614 if (dc_isar_feature(aa32_simd_r32, s)) { \
2615 reg = (((insn) >> (bigbit)) & 0x0f) \
2616 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617 } else { \
2618 if (insn & (1 << (smallbit))) \
2619 return 1; \
2620 reg = ((insn) >> (bigbit)) & 0x0f; \
2621 }} while (0)
2622
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2626
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2628 {
2629 TCGv_i32 tmp = tcg_temp_new_i32();
2630 tcg_gen_ext16u_i32(var, var);
2631 tcg_gen_shli_i32(tmp, var, 16);
2632 tcg_gen_or_i32(var, var, tmp);
2633 tcg_temp_free_i32(tmp);
2634 }
2635
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2637 {
2638 TCGv_i32 tmp = tcg_temp_new_i32();
2639 tcg_gen_andi_i32(var, var, 0xffff0000);
2640 tcg_gen_shri_i32(tmp, var, 16);
2641 tcg_gen_or_i32(var, var, tmp);
2642 tcg_temp_free_i32(tmp);
2643 }
2644
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2646 {
2647 #ifndef CONFIG_USER_ONLY
2648 return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649 ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651 return true;
2652 #endif
2653 }
2654
2655 static void gen_goto_ptr(void)
2656 {
2657 tcg_gen_lookup_and_goto_ptr();
2658 }
2659
2660 /* This will end the TB but doesn't guarantee we'll return to
2661 * cpu_loop_exec. Any live exit_requests will be processed as we
2662 * enter the next TB.
2663 */
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2665 {
2666 if (use_goto_tb(s, dest)) {
2667 tcg_gen_goto_tb(n);
2668 gen_set_pc_im(s, dest);
2669 tcg_gen_exit_tb(s->base.tb, n);
2670 } else {
2671 gen_set_pc_im(s, dest);
2672 gen_goto_ptr();
2673 }
2674 s->base.is_jmp = DISAS_NORETURN;
2675 }
2676
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2678 {
2679 if (unlikely(is_singlestepping(s))) {
2680 /* An indirect jump so that we still trigger the debug exception. */
2681 gen_set_pc_im(s, dest);
2682 s->base.is_jmp = DISAS_JUMP;
2683 } else {
2684 gen_goto_tb(s, 0, dest);
2685 }
2686 }
2687
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2689 {
2690 if (x)
2691 tcg_gen_sari_i32(t0, t0, 16);
2692 else
2693 gen_sxth(t0);
2694 if (y)
2695 tcg_gen_sari_i32(t1, t1, 16);
2696 else
2697 gen_sxth(t1);
2698 tcg_gen_mul_i32(t0, t0, t1);
2699 }
2700
2701 /* Return the mask of PSR bits set by a MSR instruction. */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2703 {
2704 uint32_t mask = 0;
2705
2706 if (flags & (1 << 0)) {
2707 mask |= 0xff;
2708 }
2709 if (flags & (1 << 1)) {
2710 mask |= 0xff00;
2711 }
2712 if (flags & (1 << 2)) {
2713 mask |= 0xff0000;
2714 }
2715 if (flags & (1 << 3)) {
2716 mask |= 0xff000000;
2717 }
2718
2719 /* Mask out undefined and reserved bits. */
2720 mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2721
2722 /* Mask out execution state. */
2723 if (!spsr) {
2724 mask &= ~CPSR_EXEC;
2725 }
2726
2727 /* Mask out privileged bits. */
2728 if (IS_USER(s)) {
2729 mask &= CPSR_USER;
2730 }
2731 return mask;
2732 }
2733
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2736 {
2737 TCGv_i32 tmp;
2738 if (spsr) {
2739 /* ??? This is also undefined in system mode. */
2740 if (IS_USER(s))
2741 return 1;
2742
2743 tmp = load_cpu_field(spsr);
2744 tcg_gen_andi_i32(tmp, tmp, ~mask);
2745 tcg_gen_andi_i32(t0, t0, mask);
2746 tcg_gen_or_i32(tmp, tmp, t0);
2747 store_cpu_field(tmp, spsr);
2748 } else {
2749 gen_set_cpsr(t0, mask);
2750 }
2751 tcg_temp_free_i32(t0);
2752 gen_lookup_tb(s);
2753 return 0;
2754 }
2755
2756 /* Returns nonzero if access to the PSR is not permitted. */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2758 {
2759 TCGv_i32 tmp;
2760 tmp = tcg_temp_new_i32();
2761 tcg_gen_movi_i32(tmp, val);
2762 return gen_set_psr(s, mask, spsr, tmp);
2763 }
2764
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766 int *tgtmode, int *regno)
2767 {
2768 /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769 * the target mode and register number, and identify the various
2770 * unpredictable cases.
2771 * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772 * + executed in user mode
2773 * + using R15 as the src/dest register
2774 * + accessing an unimplemented register
2775 * + accessing a register that's inaccessible at current PL/security state*
2776 * + accessing a register that you could access with a different insn
2777 * We choose to UNDEF in all these cases.
2778 * Since we don't know which of the various AArch32 modes we are in
2779 * we have to defer some checks to runtime.
2780 * Accesses to Monitor mode registers from Secure EL1 (which implies
2781 * that EL3 is AArch64) must trap to EL3.
2782 *
2783 * If the access checks fail this function will emit code to take
2784 * an exception and return false. Otherwise it will return true,
2785 * and set *tgtmode and *regno appropriately.
2786 */
2787 int exc_target = default_exception_el(s);
2788
2789 /* These instructions are present only in ARMv8, or in ARMv7 with the
2790 * Virtualization Extensions.
2791 */
2792 if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793 !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794 goto undef;
2795 }
2796
2797 if (IS_USER(s) || rn == 15) {
2798 goto undef;
2799 }
2800
2801 /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802 * of registers into (r, sysm).
2803 */
2804 if (r) {
2805 /* SPSRs for other modes */
2806 switch (sysm) {
2807 case 0xe: /* SPSR_fiq */
2808 *tgtmode = ARM_CPU_MODE_FIQ;
2809 break;
2810 case 0x10: /* SPSR_irq */
2811 *tgtmode = ARM_CPU_MODE_IRQ;
2812 break;
2813 case 0x12: /* SPSR_svc */
2814 *tgtmode = ARM_CPU_MODE_SVC;
2815 break;
2816 case 0x14: /* SPSR_abt */
2817 *tgtmode = ARM_CPU_MODE_ABT;
2818 break;
2819 case 0x16: /* SPSR_und */
2820 *tgtmode = ARM_CPU_MODE_UND;
2821 break;
2822 case 0x1c: /* SPSR_mon */
2823 *tgtmode = ARM_CPU_MODE_MON;
2824 break;
2825 case 0x1e: /* SPSR_hyp */
2826 *tgtmode = ARM_CPU_MODE_HYP;
2827 break;
2828 default: /* unallocated */
2829 goto undef;
2830 }
2831 /* We arbitrarily assign SPSR a register number of 16. */
2832 *regno = 16;
2833 } else {
2834 /* general purpose registers for other modes */
2835 switch (sysm) {
2836 case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
2837 *tgtmode = ARM_CPU_MODE_USR;
2838 *regno = sysm + 8;
2839 break;
2840 case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
2841 *tgtmode = ARM_CPU_MODE_FIQ;
2842 *regno = sysm;
2843 break;
2844 case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845 *tgtmode = ARM_CPU_MODE_IRQ;
2846 *regno = sysm & 1 ? 13 : 14;
2847 break;
2848 case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849 *tgtmode = ARM_CPU_MODE_SVC;
2850 *regno = sysm & 1 ? 13 : 14;
2851 break;
2852 case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853 *tgtmode = ARM_CPU_MODE_ABT;
2854 *regno = sysm & 1 ? 13 : 14;
2855 break;
2856 case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857 *tgtmode = ARM_CPU_MODE_UND;
2858 *regno = sysm & 1 ? 13 : 14;
2859 break;
2860 case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861 *tgtmode = ARM_CPU_MODE_MON;
2862 *regno = sysm & 1 ? 13 : 14;
2863 break;
2864 case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865 *tgtmode = ARM_CPU_MODE_HYP;
2866 /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867 *regno = sysm & 1 ? 13 : 17;
2868 break;
2869 default: /* unallocated */
2870 goto undef;
2871 }
2872 }
2873
2874 /* Catch the 'accessing inaccessible register' cases we can detect
2875 * at translate time.
2876 */
2877 switch (*tgtmode) {
2878 case ARM_CPU_MODE_MON:
2879 if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880 goto undef;
2881 }
2882 if (s->current_el == 1) {
2883 /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884 * then accesses to Mon registers trap to EL3
2885 */
2886 exc_target = 3;
2887 goto undef;
2888 }
2889 break;
2890 case ARM_CPU_MODE_HYP:
2891 /*
2892 * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893 * (and so we can forbid accesses from EL2 or below). elr_hyp
2894 * can be accessed also from Hyp mode, so forbid accesses from
2895 * EL0 or EL1.
2896 */
2897 if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898 (s->current_el < 3 && *regno != 17)) {
2899 goto undef;
2900 }
2901 break;
2902 default:
2903 break;
2904 }
2905
2906 return true;
2907
2908 undef:
2909 /* If we get here then some access check did not pass */
2910 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911 syn_uncategorized(), exc_target);
2912 return false;
2913 }
2914
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2916 {
2917 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918 int tgtmode = 0, regno = 0;
2919
2920 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921 return;
2922 }
2923
2924 /* Sync state because msr_banked() can raise exceptions */
2925 gen_set_condexec(s);
2926 gen_set_pc_im(s, s->pc_curr);
2927 tcg_reg = load_reg(s, rn);
2928 tcg_tgtmode = tcg_const_i32(tgtmode);
2929 tcg_regno = tcg_const_i32(regno);
2930 gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931 tcg_temp_free_i32(tcg_tgtmode);
2932 tcg_temp_free_i32(tcg_regno);
2933 tcg_temp_free_i32(tcg_reg);
2934 s->base.is_jmp = DISAS_UPDATE;
2935 }
2936
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2938 {
2939 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940 int tgtmode = 0, regno = 0;
2941
2942 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943 return;
2944 }
2945
2946 /* Sync state because mrs_banked() can raise exceptions */
2947 gen_set_condexec(s);
2948 gen_set_pc_im(s, s->pc_curr);
2949 tcg_reg = tcg_temp_new_i32();
2950 tcg_tgtmode = tcg_const_i32(tgtmode);
2951 tcg_regno = tcg_const_i32(regno);
2952 gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953 tcg_temp_free_i32(tcg_tgtmode);
2954 tcg_temp_free_i32(tcg_regno);
2955 store_reg(s, rn, tcg_reg);
2956 s->base.is_jmp = DISAS_UPDATE;
2957 }
2958
2959 /* Store value to PC as for an exception return (ie don't
2960 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961 * will do the masking based on the new value of the Thumb bit.
2962 */
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2964 {
2965 tcg_gen_mov_i32(cpu_R[15], pc);
2966 tcg_temp_free_i32(pc);
2967 }
2968
2969 /* Generate a v6 exception return. Marks both values as dead. */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2971 {
2972 store_pc_exc_ret(s, pc);
2973 /* The cpsr_write_eret helper will mask the low bits of PC
2974 * appropriately depending on the new Thumb bit, so it must
2975 * be called after storing the new PC.
2976 */
2977 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978 gen_io_start();
2979 }
2980 gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981 tcg_temp_free_i32(cpsr);
2982 /* Must exit loop to check un-masked IRQs */
2983 s->base.is_jmp = DISAS_EXIT;
2984 }
2985
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2988 {
2989 gen_rfe(s, pc, load_cpu_field(spsr));
2990 }
2991
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2993
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2995 {
2996 switch (size) {
2997 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999 case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000 default: abort();
3001 }
3002 }
3003
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3005 {
3006 switch (size) {
3007 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010 default: return;
3011 }
3012 }
3013
3014 /* 32-bit pairwise ops end up the same as the elementwise versions. */
3015 #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
3016 #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
3017 #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
3018 #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
3019
3020 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3021 switch ((size << 1) | u) { \
3022 case 0: \
3023 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3024 break; \
3025 case 1: \
3026 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3027 break; \
3028 case 2: \
3029 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3030 break; \
3031 case 3: \
3032 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3033 break; \
3034 case 4: \
3035 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3036 break; \
3037 case 5: \
3038 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3039 break; \
3040 default: return 1; \
3041 }} while (0)
3042
3043 #define GEN_NEON_INTEGER_OP(name) do { \
3044 switch ((size << 1) | u) { \
3045 case 0: \
3046 gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3047 break; \
3048 case 1: \
3049 gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3050 break; \
3051 case 2: \
3052 gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3053 break; \
3054 case 3: \
3055 gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3056 break; \
3057 case 4: \
3058 gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3059 break; \
3060 case 5: \
3061 gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3062 break; \
3063 default: return 1; \
3064 }} while (0)
3065
3066 static TCGv_i32 neon_load_scratch(int scratch)
3067 {
3068 TCGv_i32 tmp = tcg_temp_new_i32();
3069 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3070 return tmp;
3071 }
3072
3073 static void neon_store_scratch(int scratch, TCGv_i32 var)
3074 {
3075 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3076 tcg_temp_free_i32(var);
3077 }
3078
3079 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3080 {
3081 TCGv_i32 tmp;
3082 if (size == 1) {
3083 tmp = neon_load_reg(reg & 7, reg >> 4);
3084 if (reg & 8) {
3085 gen_neon_dup_high16(tmp);
3086 } else {
3087 gen_neon_dup_low16(tmp);
3088 }
3089 } else {
3090 tmp = neon_load_reg(reg & 15, reg >> 4);
3091 }
3092 return tmp;
3093 }
3094
3095 static int gen_neon_unzip(int rd, int rm, int size, int q)
3096 {
3097 TCGv_ptr pd, pm;
3098
3099 if (!q && size == 2) {
3100 return 1;
3101 }
3102 pd = vfp_reg_ptr(true, rd);
3103 pm = vfp_reg_ptr(true, rm);
3104 if (q) {
3105 switch (size) {
3106 case 0:
3107 gen_helper_neon_qunzip8(pd, pm);
3108 break;
3109 case 1:
3110 gen_helper_neon_qunzip16(pd, pm);
3111 break;
3112 case 2:
3113 gen_helper_neon_qunzip32(pd, pm);
3114 break;
3115 default:
3116 abort();
3117 }
3118 } else {
3119 switch (size) {
3120 case 0:
3121 gen_helper_neon_unzip8(pd, pm);
3122 break;
3123 case 1:
3124 gen_helper_neon_unzip16(pd, pm);
3125 break;
3126 default:
3127 abort();
3128 }
3129 }
3130 tcg_temp_free_ptr(pd);
3131 tcg_temp_free_ptr(pm);
3132 return 0;
3133 }
3134
3135 static int gen_neon_zip(int rd, int rm, int size, int q)
3136 {
3137 TCGv_ptr pd, pm;
3138
3139 if (!q && size == 2) {
3140 return 1;
3141 }
3142 pd = vfp_reg_ptr(true, rd);
3143 pm = vfp_reg_ptr(true, rm);
3144 if (q) {
3145 switch (size) {
3146 case 0:
3147 gen_helper_neon_qzip8(pd, pm);
3148 break;
3149 case 1:
3150 gen_helper_neon_qzip16(pd, pm);
3151 break;
3152 case 2:
3153 gen_helper_neon_qzip32(pd, pm);
3154 break;
3155 default:
3156 abort();
3157 }
3158 } else {
3159 switch (size) {
3160 case 0:
3161 gen_helper_neon_zip8(pd, pm);
3162 break;
3163 case 1:
3164 gen_helper_neon_zip16(pd, pm);
3165 break;
3166 default:
3167 abort();
3168 }
3169 }
3170 tcg_temp_free_ptr(pd);
3171 tcg_temp_free_ptr(pm);
3172 return 0;
3173 }
3174
3175 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3176 {
3177 TCGv_i32 rd, tmp;
3178
3179 rd = tcg_temp_new_i32();
3180 tmp = tcg_temp_new_i32();
3181
3182 tcg_gen_shli_i32(rd, t0, 8);
3183 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3184 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3185 tcg_gen_or_i32(rd, rd, tmp);
3186
3187 tcg_gen_shri_i32(t1, t1, 8);
3188 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3189 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3190 tcg_gen_or_i32(t1, t1, tmp);
3191 tcg_gen_mov_i32(t0, rd);
3192
3193 tcg_temp_free_i32(tmp);
3194 tcg_temp_free_i32(rd);
3195 }
3196
3197 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3198 {
3199 TCGv_i32 rd, tmp;
3200
3201 rd = tcg_temp_new_i32();
3202 tmp = tcg_temp_new_i32();
3203
3204 tcg_gen_shli_i32(rd, t0, 16);
3205 tcg_gen_andi_i32(tmp, t1, 0xffff);
3206 tcg_gen_or_i32(rd, rd, tmp);
3207 tcg_gen_shri_i32(t1, t1, 16);
3208 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3209 tcg_gen_or_i32(t1, t1, tmp);
3210 tcg_gen_mov_i32(t0, rd);
3211
3212 tcg_temp_free_i32(tmp);
3213 tcg_temp_free_i32(rd);
3214 }
3215
3216 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3217 {
3218 switch (size) {
3219 case 0: gen_helper_neon_narrow_u8(dest, src); break;
3220 case 1: gen_helper_neon_narrow_u16(dest, src); break;
3221 case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3222 default: abort();
3223 }
3224 }
3225
3226 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3227 {
3228 switch (size) {
3229 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3230 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3231 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3232 default: abort();
3233 }
3234 }
3235
3236 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3237 {
3238 switch (size) {
3239 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3240 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3241 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3242 default: abort();
3243 }
3244 }
3245
3246 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3247 {
3248 switch (size) {
3249 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3250 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3251 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3252 default: abort();
3253 }
3254 }
3255
3256 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3257 int q, int u)
3258 {
3259 if (q) {
3260 if (u) {
3261 switch (size) {
3262 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3263 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3264 default: abort();
3265 }
3266 } else {
3267 switch (size) {
3268 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3269 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3270 default: abort();
3271 }
3272 }
3273 } else {
3274 if (u) {
3275 switch (size) {
3276 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3277 case 2: gen_ushl_i32(var, var, shift); break;
3278 default: abort();
3279 }
3280 } else {
3281 switch (size) {
3282 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3283 case 2: gen_sshl_i32(var, var, shift); break;
3284 default: abort();
3285 }
3286 }
3287 }
3288 }
3289
3290 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3291 {
3292 if (u) {
3293 switch (size) {
3294 case 0: gen_helper_neon_widen_u8(dest, src); break;
3295 case 1: gen_helper_neon_widen_u16(dest, src); break;
3296 case 2: tcg_gen_extu_i32_i64(dest, src); break;
3297 default: abort();
3298 }
3299 } else {
3300 switch (size) {
3301 case 0: gen_helper_neon_widen_s8(dest, src); break;
3302 case 1: gen_helper_neon_widen_s16(dest, src); break;
3303 case 2: tcg_gen_ext_i32_i64(dest, src); break;
3304 default: abort();
3305 }
3306 }
3307 tcg_temp_free_i32(src);
3308 }
3309
3310 static inline void gen_neon_addl(int size)
3311 {
3312 switch (size) {
3313 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3314 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3315 case 2: tcg_gen_add_i64(CPU_V001); break;
3316 default: abort();
3317 }
3318 }
3319
3320 static inline void gen_neon_subl(int size)
3321 {
3322 switch (size) {
3323 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3324 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3325 case 2: tcg_gen_sub_i64(CPU_V001); break;
3326 default: abort();
3327 }
3328 }
3329
3330 static inline void gen_neon_negl(TCGv_i64 var, int size)
3331 {
3332 switch (size) {
3333 case 0: gen_helper_neon_negl_u16(var, var); break;
3334 case 1: gen_helper_neon_negl_u32(var, var); break;
3335 case 2:
3336 tcg_gen_neg_i64(var, var);
3337 break;
3338 default: abort();
3339 }
3340 }
3341
3342 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3343 {
3344 switch (size) {
3345 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3346 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3347 default: abort();
3348 }
3349 }
3350
3351 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3352 int size, int u)
3353 {
3354 TCGv_i64 tmp;
3355
3356 switch ((size << 1) | u) {
3357 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3358 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3359 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3360 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3361 case 4:
3362 tmp = gen_muls_i64_i32(a, b);
3363 tcg_gen_mov_i64(dest, tmp);
3364 tcg_temp_free_i64(tmp);
3365 break;
3366 case 5:
3367 tmp = gen_mulu_i64_i32(a, b);
3368 tcg_gen_mov_i64(dest, tmp);
3369 tcg_temp_free_i64(tmp);
3370 break;
3371 default: abort();
3372 }
3373
3374 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3375 Don't forget to clean them now. */
3376 if (size < 2) {
3377 tcg_temp_free_i32(a);
3378 tcg_temp_free_i32(b);
3379 }
3380 }
3381
3382 static void gen_neon_narrow_op(int op, int u, int size,
3383 TCGv_i32 dest, TCGv_i64 src)
3384 {
3385 if (op) {
3386 if (u) {
3387 gen_neon_unarrow_sats(size, dest, src);
3388 } else {
3389 gen_neon_narrow(size, dest, src);
3390 }
3391 } else {
3392 if (u) {
3393 gen_neon_narrow_satu(size, dest, src);
3394 } else {
3395 gen_neon_narrow_sats(size, dest, src);
3396 }
3397 }
3398 }
3399
3400 /* Symbolic constants for op fields for Neon 3-register same-length.
3401 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3402 * table A7-9.
3403 */
3404 #define NEON_3R_VHADD 0
3405 #define NEON_3R_VQADD 1
3406 #define NEON_3R_VRHADD 2
3407 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3408 #define NEON_3R_VHSUB 4
3409 #define NEON_3R_VQSUB 5
3410 #define NEON_3R_VCGT 6
3411 #define NEON_3R_VCGE 7
3412 #define NEON_3R_VSHL 8
3413 #define NEON_3R_VQSHL 9
3414 #define NEON_3R_VRSHL 10
3415 #define NEON_3R_VQRSHL 11
3416 #define NEON_3R_VMAX 12
3417 #define NEON_3R_VMIN 13
3418 #define NEON_3R_VABD 14
3419 #define NEON_3R_VABA 15
3420 #define NEON_3R_VADD_VSUB 16
3421 #define NEON_3R_VTST_VCEQ 17
3422 #define NEON_3R_VML 18 /* VMLA, VMLS */
3423 #define NEON_3R_VMUL 19
3424 #define NEON_3R_VPMAX 20
3425 #define NEON_3R_VPMIN 21
3426 #define NEON_3R_VQDMULH_VQRDMULH 22
3427 #define NEON_3R_VPADD_VQRDMLAH 23
3428 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3429 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3430 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3431 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3432 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3433 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3434 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3435 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3436
3437 static const uint8_t neon_3r_sizes[] = {
3438 [NEON_3R_VHADD] = 0x7,
3439 [NEON_3R_VQADD] = 0xf,
3440 [NEON_3R_VRHADD] = 0x7,
3441 [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
3442 [NEON_3R_VHSUB] = 0x7,
3443 [NEON_3R_VQSUB] = 0xf,
3444 [NEON_3R_VCGT] = 0x7,
3445 [NEON_3R_VCGE] = 0x7,
3446 [NEON_3R_VSHL] = 0xf,
3447 [NEON_3R_VQSHL] = 0xf,
3448 [NEON_3R_VRSHL] = 0xf,
3449 [NEON_3R_VQRSHL] = 0xf,
3450 [NEON_3R_VMAX] = 0x7,
3451 [NEON_3R_VMIN] = 0x7,
3452 [NEON_3R_VABD] = 0x7,
3453 [NEON_3R_VABA] = 0x7,
3454 [NEON_3R_VADD_VSUB] = 0xf,
3455 [NEON_3R_VTST_VCEQ] = 0x7,
3456 [NEON_3R_VML] = 0x7,
3457 [NEON_3R_VMUL] = 0x7,
3458 [NEON_3R_VPMAX] = 0x7,
3459 [NEON_3R_VPMIN] = 0x7,
3460 [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
3461 [NEON_3R_VPADD_VQRDMLAH] = 0x7,
3462 [NEON_3R_SHA] = 0xf, /* size field encodes op type */
3463 [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
3464 [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
3465 [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
3466 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
3467 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
3468 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
3469 [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
3470 };
3471
3472 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3473 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3474 * table A7-13.
3475 */
3476 #define NEON_2RM_VREV64 0
3477 #define NEON_2RM_VREV32 1
3478 #define NEON_2RM_VREV16 2
3479 #define NEON_2RM_VPADDL 4
3480 #define NEON_2RM_VPADDL_U 5
3481 #define NEON_2RM_AESE 6 /* Includes AESD */
3482 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3483 #define NEON_2RM_VCLS 8
3484 #define NEON_2RM_VCLZ 9
3485 #define NEON_2RM_VCNT 10
3486 #define NEON_2RM_VMVN 11
3487 #define NEON_2RM_VPADAL 12
3488 #define NEON_2RM_VPADAL_U 13
3489 #define NEON_2RM_VQABS 14
3490 #define NEON_2RM_VQNEG 15
3491 #define NEON_2RM_VCGT0 16
3492 #define NEON_2RM_VCGE0 17
3493 #define NEON_2RM_VCEQ0 18
3494 #define NEON_2RM_VCLE0 19
3495 #define NEON_2RM_VCLT0 20
3496 #define NEON_2RM_SHA1H 21
3497 #define NEON_2RM_VABS 22
3498 #define NEON_2RM_VNEG 23
3499 #define NEON_2RM_VCGT0_F 24
3500 #define NEON_2RM_VCGE0_F 25
3501 #define NEON_2RM_VCEQ0_F 26
3502 #define NEON_2RM_VCLE0_F 27
3503 #define NEON_2RM_VCLT0_F 28
3504 #define NEON_2RM_VABS_F 30
3505 #define NEON_2RM_VNEG_F 31
3506 #define NEON_2RM_VSWP 32
3507 #define NEON_2RM_VTRN 33
3508 #define NEON_2RM_VUZP 34
3509 #define NEON_2RM_VZIP 35
3510 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3511 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3512 #define NEON_2RM_VSHLL 38
3513 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3514 #define NEON_2RM_VRINTN 40
3515 #define NEON_2RM_VRINTX 41
3516 #define NEON_2RM_VRINTA 42
3517 #define NEON_2RM_VRINTZ 43
3518 #define NEON_2RM_VCVT_F16_F32 44
3519 #define NEON_2RM_VRINTM 45
3520 #define NEON_2RM_VCVT_F32_F16 46
3521 #define NEON_2RM_VRINTP 47
3522 #define NEON_2RM_VCVTAU 48
3523 #define NEON_2RM_VCVTAS 49
3524 #define NEON_2RM_VCVTNU 50
3525 #define NEON_2RM_VCVTNS 51
3526 #define NEON_2RM_VCVTPU 52
3527 #define NEON_2RM_VCVTPS 53
3528 #define NEON_2RM_VCVTMU 54
3529 #define NEON_2RM_VCVTMS 55
3530 #define NEON_2RM_VRECPE 56
3531 #define NEON_2RM_VRSQRTE 57
3532 #define NEON_2RM_VRECPE_F 58
3533 #define NEON_2RM_VRSQRTE_F 59
3534 #define NEON_2RM_VCVT_FS 60
3535 #define NEON_2RM_VCVT_FU 61
3536 #define NEON_2RM_VCVT_SF 62
3537 #define NEON_2RM_VCVT_UF 63
3538
3539 static bool neon_2rm_is_v8_op(int op)
3540 {
3541 /* Return true if this neon 2reg-misc op is ARMv8 and up */
3542 switch (op) {
3543 case NEON_2RM_VRINTN:
3544 case NEON_2RM_VRINTA:
3545 case NEON_2RM_VRINTM:
3546 case NEON_2RM_VRINTP:
3547 case NEON_2RM_VRINTZ:
3548 case NEON_2RM_VRINTX:
3549 case NEON_2RM_VCVTAU:
3550 case NEON_2RM_VCVTAS:
3551 case NEON_2RM_VCVTNU:
3552 case NEON_2RM_VCVTNS:
3553 case NEON_2RM_VCVTPU:
3554 case NEON_2RM_VCVTPS:
3555 case NEON_2RM_VCVTMU:
3556 case NEON_2RM_VCVTMS:
3557 return true;
3558 default:
3559 return false;
3560 }
3561 }
3562
3563 /* Each entry in this array has bit n set if the insn allows
3564 * size value n (otherwise it will UNDEF). Since unallocated
3565 * op values will have no bits set they always UNDEF.
3566 */
3567 static const uint8_t neon_2rm_sizes[] = {
3568 [NEON_2RM_VREV64] = 0x7,
3569 [NEON_2RM_VREV32] = 0x3,
3570 [NEON_2RM_VREV16] = 0x1,
3571 [NEON_2RM_VPADDL] = 0x7,
3572 [NEON_2RM_VPADDL_U] = 0x7,
3573 [NEON_2RM_AESE] = 0x1,
3574 [NEON_2RM_AESMC] = 0x1,
3575 [NEON_2RM_VCLS] = 0x7,
3576 [NEON_2RM_VCLZ] = 0x7,
3577 [NEON_2RM_VCNT] = 0x1,
3578 [NEON_2RM_VMVN] = 0x1,
3579 [NEON_2RM_VPADAL] = 0x7,
3580 [NEON_2RM_VPADAL_U] = 0x7,
3581 [NEON_2RM_VQABS] = 0x7,
3582 [NEON_2RM_VQNEG] = 0x7,
3583 [NEON_2RM_VCGT0] = 0x7,
3584 [NEON_2RM_VCGE0] = 0x7,
3585 [NEON_2RM_VCEQ0] = 0x7,
3586 [NEON_2RM_VCLE0] = 0x7,
3587 [NEON_2RM_VCLT0] = 0x7,
3588 [NEON_2RM_SHA1H] = 0x4,
3589 [NEON_2RM_VABS] = 0x7,
3590 [NEON_2RM_VNEG] = 0x7,
3591 [NEON_2RM_VCGT0_F] = 0x4,
3592 [NEON_2RM_VCGE0_F] = 0x4,
3593 [NEON_2RM_VCEQ0_F] = 0x4,
3594 [NEON_2RM_VCLE0_F] = 0x4,
3595 [NEON_2RM_VCLT0_F] = 0x4,
3596 [NEON_2RM_VABS_F] = 0x4,
3597 [NEON_2RM_VNEG_F] = 0x4,
3598 [NEON_2RM_VSWP] = 0x1,
3599 [NEON_2RM_VTRN] = 0x7,
3600 [NEON_2RM_VUZP] = 0x7,
3601 [NEON_2RM_VZIP] = 0x7,
3602 [NEON_2RM_VMOVN] = 0x7,
3603 [NEON_2RM_VQMOVN] = 0x7,
3604 [NEON_2RM_VSHLL] = 0x7,
3605 [NEON_2RM_SHA1SU1] = 0x4,
3606 [NEON_2RM_VRINTN] = 0x4,
3607 [NEON_2RM_VRINTX] = 0x4,
3608 [NEON_2RM_VRINTA] = 0x4,
3609 [NEON_2RM_VRINTZ] = 0x4,
3610 [NEON_2RM_VCVT_F16_F32] = 0x2,
3611 [NEON_2RM_VRINTM] = 0x4,
3612 [NEON_2RM_VCVT_F32_F16] = 0x2,
3613 [NEON_2RM_VRINTP] = 0x4,
3614 [NEON_2RM_VCVTAU] = 0x4,
3615 [NEON_2RM_VCVTAS] = 0x4,
3616 [NEON_2RM_VCVTNU] = 0x4,
3617 [NEON_2RM_VCVTNS] = 0x4,
3618 [NEON_2RM_VCVTPU] = 0x4,
3619 [NEON_2RM_VCVTPS] = 0x4,
3620 [NEON_2RM_VCVTMU] = 0x4,
3621 [NEON_2RM_VCVTMS] = 0x4,
3622 [NEON_2RM_VRECPE] = 0x4,
3623 [NEON_2RM_VRSQRTE] = 0x4,
3624 [NEON_2RM_VRECPE_F] = 0x4,
3625 [NEON_2RM_VRSQRTE_F] = 0x4,
3626 [NEON_2RM_VCVT_FS] = 0x4,
3627 [NEON_2RM_VCVT_FU] = 0x4,
3628 [NEON_2RM_VCVT_SF] = 0x4,
3629 [NEON_2RM_VCVT_UF] = 0x4,
3630 };
3631
3632
3633 /* Expand v8.1 simd helper. */
3634 static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
3635 int q, int rd, int rn, int rm)
3636 {
3637 if (dc_isar_feature(aa32_rdm, s)) {
3638 int opr_sz = (1 + q) * 8;
3639 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
3640 vfp_reg_offset(1, rn),
3641 vfp_reg_offset(1, rm), cpu_env,
3642 opr_sz, opr_sz, 0, fn);
3643 return 0;
3644 }
3645 return 1;
3646 }
3647
3648 static void gen_ceq0_i32(TCGv_i32 d, TCGv_i32 a)
3649 {
3650 tcg_gen_setcondi_i32(TCG_COND_EQ, d, a, 0);
3651 tcg_gen_neg_i32(d, d);
3652 }
3653
3654 static void gen_ceq0_i64(TCGv_i64 d, TCGv_i64 a)
3655 {
3656 tcg_gen_setcondi_i64(TCG_COND_EQ, d, a, 0);
3657 tcg_gen_neg_i64(d, d);
3658 }
3659
3660 static void gen_ceq0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3661 {
3662 TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3663 tcg_gen_cmp_vec(TCG_COND_EQ, vece, d, a, zero);
3664 tcg_temp_free_vec(zero);
3665 }
3666
3667 static const TCGOpcode vecop_list_cmp[] = {
3668 INDEX_op_cmp_vec, 0
3669 };
3670
3671 const GVecGen2 ceq0_op[4] = {
3672 { .fno = gen_helper_gvec_ceq0_b,
3673 .fniv = gen_ceq0_vec,
3674 .opt_opc = vecop_list_cmp,
3675 .vece = MO_8 },
3676 { .fno = gen_helper_gvec_ceq0_h,
3677 .fniv = gen_ceq0_vec,
3678 .opt_opc = vecop_list_cmp,
3679 .vece = MO_16 },
3680 { .fni4 = gen_ceq0_i32,
3681 .fniv = gen_ceq0_vec,
3682 .opt_opc = vecop_list_cmp,
3683 .vece = MO_32 },
3684 { .fni8 = gen_ceq0_i64,
3685 .fniv = gen_ceq0_vec,
3686 .opt_opc = vecop_list_cmp,
3687 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3688 .vece = MO_64 },
3689 };
3690
3691 static void gen_cle0_i32(TCGv_i32 d, TCGv_i32 a)
3692 {
3693 tcg_gen_setcondi_i32(TCG_COND_LE, d, a, 0);
3694 tcg_gen_neg_i32(d, d);
3695 }
3696
3697 static void gen_cle0_i64(TCGv_i64 d, TCGv_i64 a)
3698 {
3699 tcg_gen_setcondi_i64(TCG_COND_LE, d, a, 0);
3700 tcg_gen_neg_i64(d, d);
3701 }
3702
3703 static void gen_cle0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3704 {
3705 TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3706 tcg_gen_cmp_vec(TCG_COND_LE, vece, d, a, zero);
3707 tcg_temp_free_vec(zero);
3708 }
3709
3710 const GVecGen2 cle0_op[4] = {
3711 { .fno = gen_helper_gvec_cle0_b,
3712 .fniv = gen_cle0_vec,
3713 .opt_opc = vecop_list_cmp,
3714 .vece = MO_8 },
3715 { .fno = gen_helper_gvec_cle0_h,
3716 .fniv = gen_cle0_vec,
3717 .opt_opc = vecop_list_cmp,
3718 .vece = MO_16 },
3719 { .fni4 = gen_cle0_i32,
3720 .fniv = gen_cle0_vec,
3721 .opt_opc = vecop_list_cmp,
3722 .vece = MO_32 },
3723 { .fni8 = gen_cle0_i64,
3724 .fniv = gen_cle0_vec,
3725 .opt_opc = vecop_list_cmp,
3726 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3727 .vece = MO_64 },
3728 };
3729
3730 static void gen_cge0_i32(TCGv_i32 d, TCGv_i32 a)
3731 {
3732 tcg_gen_setcondi_i32(TCG_COND_GE, d, a, 0);
3733 tcg_gen_neg_i32(d, d);
3734 }
3735
3736 static void gen_cge0_i64(TCGv_i64 d, TCGv_i64 a)
3737 {
3738 tcg_gen_setcondi_i64(TCG_COND_GE, d, a, 0);
3739 tcg_gen_neg_i64(d, d);
3740 }
3741
3742 static void gen_cge0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3743 {
3744 TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3745 tcg_gen_cmp_vec(TCG_COND_GE, vece, d, a, zero);
3746 tcg_temp_free_vec(zero);
3747 }
3748
3749 const GVecGen2 cge0_op[4] = {
3750 { .fno = gen_helper_gvec_cge0_b,
3751 .fniv = gen_cge0_vec,
3752 .opt_opc = vecop_list_cmp,
3753 .vece = MO_8 },
3754 { .fno = gen_helper_gvec_cge0_h,
3755 .fniv = gen_cge0_vec,
3756 .opt_opc = vecop_list_cmp,
3757 .vece = MO_16 },
3758 { .fni4 = gen_cge0_i32,
3759 .fniv = gen_cge0_vec,
3760 .opt_opc = vecop_list_cmp,
3761 .vece = MO_32 },
3762 { .fni8 = gen_cge0_i64,
3763 .fniv = gen_cge0_vec,
3764 .opt_opc = vecop_list_cmp,
3765 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3766 .vece = MO_64 },
3767 };
3768
3769 static void gen_clt0_i32(TCGv_i32 d, TCGv_i32 a)
3770 {
3771 tcg_gen_setcondi_i32(TCG_COND_LT, d, a, 0);
3772 tcg_gen_neg_i32(d, d);
3773 }
3774
3775 static void gen_clt0_i64(TCGv_i64 d, TCGv_i64 a)
3776 {
3777 tcg_gen_setcondi_i64(TCG_COND_LT, d, a, 0);
3778 tcg_gen_neg_i64(d, d);
3779 }
3780
3781 static void gen_clt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3782 {
3783 TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3784 tcg_gen_cmp_vec(TCG_COND_LT, vece, d, a, zero);
3785 tcg_temp_free_vec(zero);
3786 }
3787
3788 const GVecGen2 clt0_op[4] = {
3789 { .fno = gen_helper_gvec_clt0_b,
3790 .fniv = gen_clt0_vec,
3791 .opt_opc = vecop_list_cmp,
3792 .vece = MO_8 },
3793 { .fno = gen_helper_gvec_clt0_h,
3794 .fniv = gen_clt0_vec,
3795 .opt_opc = vecop_list_cmp,
3796 .vece = MO_16 },
3797 { .fni4 = gen_clt0_i32,
3798 .fniv = gen_clt0_vec,
3799 .opt_opc = vecop_list_cmp,
3800 .vece = MO_32 },
3801 { .fni8 = gen_clt0_i64,
3802 .fniv = gen_clt0_vec,
3803 .opt_opc = vecop_list_cmp,
3804 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3805 .vece = MO_64 },
3806 };
3807
3808 static void gen_cgt0_i32(TCGv_i32 d, TCGv_i32 a)
3809 {
3810 tcg_gen_setcondi_i32(TCG_COND_GT, d, a, 0);
3811 tcg_gen_neg_i32(d, d);
3812 }
3813
3814 static void gen_cgt0_i64(TCGv_i64 d, TCGv_i64 a)
3815 {
3816 tcg_gen_setcondi_i64(TCG_COND_GT, d, a, 0);
3817 tcg_gen_neg_i64(d, d);
3818 }
3819
3820 static void gen_cgt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
3821 {
3822 TCGv_vec zero = tcg_const_zeros_vec_matching(d);
3823 tcg_gen_cmp_vec(TCG_COND_GT, vece, d, a, zero);
3824 tcg_temp_free_vec(zero);
3825 }
3826
3827 const GVecGen2 cgt0_op[4] = {
3828 { .fno = gen_helper_gvec_cgt0_b,
3829 .fniv = gen_cgt0_vec,
3830 .opt_opc = vecop_list_cmp,
3831 .vece = MO_8 },
3832 { .fno = gen_helper_gvec_cgt0_h,
3833 .fniv = gen_cgt0_vec,
3834 .opt_opc = vecop_list_cmp,
3835 .vece = MO_16 },
3836 { .fni4 = gen_cgt0_i32,
3837 .fniv = gen_cgt0_vec,
3838 .opt_opc = vecop_list_cmp,
3839 .vece = MO_32 },
3840 { .fni8 = gen_cgt0_i64,
3841 .fniv = gen_cgt0_vec,
3842 .opt_opc = vecop_list_cmp,
3843 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3844 .vece = MO_64 },
3845 };
3846
3847 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3848 {
3849 tcg_gen_vec_sar8i_i64(a, a, shift);
3850 tcg_gen_vec_add8_i64(d, d, a);
3851 }
3852
3853 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3854 {
3855 tcg_gen_vec_sar16i_i64(a, a, shift);
3856 tcg_gen_vec_add16_i64(d, d, a);
3857 }
3858
3859 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3860 {
3861 tcg_gen_sari_i32(a, a, shift);
3862 tcg_gen_add_i32(d, d, a);
3863 }
3864
3865 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3866 {
3867 tcg_gen_sari_i64(a, a, shift);
3868 tcg_gen_add_i64(d, d, a);
3869 }
3870
3871 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3872 {
3873 tcg_gen_sari_vec(vece, a, a, sh);
3874 tcg_gen_add_vec(vece, d, d, a);
3875 }
3876
3877 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3878 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3879 {
3880 static const TCGOpcode vecop_list[] = {
3881 INDEX_op_sari_vec, INDEX_op_add_vec, 0
3882 };
3883 static const GVecGen2i ops[4] = {
3884 { .fni8 = gen_ssra8_i64,
3885 .fniv = gen_ssra_vec,
3886 .fno = gen_helper_gvec_ssra_b,
3887 .load_dest = true,
3888 .opt_opc = vecop_list,
3889 .vece = MO_8 },
3890 { .fni8 = gen_ssra16_i64,
3891 .fniv = gen_ssra_vec,
3892 .fno = gen_helper_gvec_ssra_h,
3893 .load_dest = true,
3894 .opt_opc = vecop_list,
3895 .vece = MO_16 },
3896 { .fni4 = gen_ssra32_i32,
3897 .fniv = gen_ssra_vec,
3898 .fno = gen_helper_gvec_ssra_s,
3899 .load_dest = true,
3900 .opt_opc = vecop_list,
3901 .vece = MO_32 },
3902 { .fni8 = gen_ssra64_i64,
3903 .fniv = gen_ssra_vec,
3904 .fno = gen_helper_gvec_ssra_b,
3905 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3906 .opt_opc = vecop_list,
3907 .load_dest = true,
3908 .vece = MO_64 },
3909 };
3910
3911 /* tszimm encoding produces immediates in the range [1..esize]. */
3912 tcg_debug_assert(shift > 0);
3913 tcg_debug_assert(shift <= (8 << vece));
3914
3915 /*
3916 * Shifts larger than the element size are architecturally valid.
3917 * Signed results in all sign bits.
3918 */
3919 shift = MIN(shift, (8 << vece) - 1);
3920 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3921 }
3922
3923 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3924 {
3925 tcg_gen_vec_shr8i_i64(a, a, shift);
3926 tcg_gen_vec_add8_i64(d, d, a);
3927 }
3928
3929 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3930 {
3931 tcg_gen_vec_shr16i_i64(a, a, shift);
3932 tcg_gen_vec_add16_i64(d, d, a);
3933 }
3934
3935 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3936 {
3937 tcg_gen_shri_i32(a, a, shift);
3938 tcg_gen_add_i32(d, d, a);
3939 }
3940
3941 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3942 {
3943 tcg_gen_shri_i64(a, a, shift);
3944 tcg_gen_add_i64(d, d, a);
3945 }
3946
3947 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3948 {
3949 tcg_gen_shri_vec(vece, a, a, sh);
3950 tcg_gen_add_vec(vece, d, d, a);
3951 }
3952
3953 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3954 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3955 {
3956 static const TCGOpcode vecop_list[] = {
3957 INDEX_op_shri_vec, INDEX_op_add_vec, 0
3958 };
3959 static const GVecGen2i ops[4] = {
3960 { .fni8 = gen_usra8_i64,
3961 .fniv = gen_usra_vec,
3962 .fno = gen_helper_gvec_usra_b,
3963 .load_dest = true,
3964 .opt_opc = vecop_list,
3965 .vece = MO_8, },
3966 { .fni8 = gen_usra16_i64,
3967 .fniv = gen_usra_vec,
3968 .fno = gen_helper_gvec_usra_h,
3969 .load_dest = true,
3970 .opt_opc = vecop_list,
3971 .vece = MO_16, },
3972 { .fni4 = gen_usra32_i32,
3973 .fniv = gen_usra_vec,
3974 .fno = gen_helper_gvec_usra_s,
3975 .load_dest = true,
3976 .opt_opc = vecop_list,
3977 .vece = MO_32, },
3978 { .fni8 = gen_usra64_i64,
3979 .fniv = gen_usra_vec,
3980 .fno = gen_helper_gvec_usra_d,
3981 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3982 .load_dest = true,
3983 .opt_opc = vecop_list,
3984 .vece = MO_64, },
3985 };
3986
3987 /* tszimm encoding produces immediates in the range [1..esize]. */
3988 tcg_debug_assert(shift > 0);
3989 tcg_debug_assert(shift <= (8 << vece));
3990
3991 /*
3992 * Shifts larger than the element size are architecturally valid.
3993 * Unsigned results in all zeros as input to accumulate: nop.
3994 */
3995 if (shift < (8 << vece)) {
3996 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3997 } else {
3998 /* Nop, but we do need to clear the tail. */
3999 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4000 }
4001 }
4002
4003 /*
4004 * Shift one less than the requested amount, and the low bit is
4005 * the rounding bit. For the 8 and 16-bit operations, because we
4006 * mask the low bit, we can perform a normal integer shift instead
4007 * of a vector shift.
4008 */
4009 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4010 {
4011 TCGv_i64 t = tcg_temp_new_i64();
4012
4013 tcg_gen_shri_i64(t, a, sh - 1);
4014 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
4015 tcg_gen_vec_sar8i_i64(d, a, sh);
4016 tcg_gen_vec_add8_i64(d, d, t);
4017 tcg_temp_free_i64(t);
4018 }
4019
4020 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4021 {
4022 TCGv_i64 t = tcg_temp_new_i64();
4023
4024 tcg_gen_shri_i64(t, a, sh - 1);
4025 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
4026 tcg_gen_vec_sar16i_i64(d, a, sh);
4027 tcg_gen_vec_add16_i64(d, d, t);
4028 tcg_temp_free_i64(t);
4029 }
4030
4031 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4032 {
4033 TCGv_i32 t = tcg_temp_new_i32();
4034
4035 tcg_gen_extract_i32(t, a, sh - 1, 1);
4036 tcg_gen_sari_i32(d, a, sh);
4037 tcg_gen_add_i32(d, d, t);
4038 tcg_temp_free_i32(t);
4039 }
4040
4041 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4042 {
4043 TCGv_i64 t = tcg_temp_new_i64();
4044
4045 tcg_gen_extract_i64(t, a, sh - 1, 1);
4046 tcg_gen_sari_i64(d, a, sh);
4047 tcg_gen_add_i64(d, d, t);
4048 tcg_temp_free_i64(t);
4049 }
4050
4051 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4052 {
4053 TCGv_vec t = tcg_temp_new_vec_matching(d);
4054 TCGv_vec ones = tcg_temp_new_vec_matching(d);
4055
4056 tcg_gen_shri_vec(vece, t, a, sh - 1);
4057 tcg_gen_dupi_vec(vece, ones, 1);
4058 tcg_gen_and_vec(vece, t, t, ones);
4059 tcg_gen_sari_vec(vece, d, a, sh);
4060 tcg_gen_add_vec(vece, d, d, t);
4061
4062 tcg_temp_free_vec(t);
4063 tcg_temp_free_vec(ones);
4064 }
4065
4066 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4067 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4068 {
4069 static const TCGOpcode vecop_list[] = {
4070 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
4071 };
4072 static const GVecGen2i ops[4] = {
4073 { .fni8 = gen_srshr8_i64,
4074 .fniv = gen_srshr_vec,
4075 .fno = gen_helper_gvec_srshr_b,
4076 .opt_opc = vecop_list,
4077 .vece = MO_8 },
4078 { .fni8 = gen_srshr16_i64,
4079 .fniv = gen_srshr_vec,
4080 .fno = gen_helper_gvec_srshr_h,
4081 .opt_opc = vecop_list,
4082 .vece = MO_16 },
4083 { .fni4 = gen_srshr32_i32,
4084 .fniv = gen_srshr_vec,
4085 .fno = gen_helper_gvec_srshr_s,
4086 .opt_opc = vecop_list,
4087 .vece = MO_32 },
4088 { .fni8 = gen_srshr64_i64,
4089 .fniv = gen_srshr_vec,
4090 .fno = gen_helper_gvec_srshr_d,
4091 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4092 .opt_opc = vecop_list,
4093 .vece = MO_64 },
4094 };
4095
4096 /* tszimm encoding produces immediates in the range [1..esize] */
4097 tcg_debug_assert(shift > 0);
4098 tcg_debug_assert(shift <= (8 << vece));
4099
4100 if (shift == (8 << vece)) {
4101 /*
4102 * Shifts larger than the element size are architecturally valid.
4103 * Signed results in all sign bits. With rounding, this produces
4104 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
4105 * I.e. always zero.
4106 */
4107 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
4108 } else {
4109 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4110 }
4111 }
4112
4113 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4114 {
4115 TCGv_i64 t = tcg_temp_new_i64();
4116
4117 gen_srshr8_i64(t, a, sh);
4118 tcg_gen_vec_add8_i64(d, d, t);
4119 tcg_temp_free_i64(t);
4120 }
4121
4122 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4123 {
4124 TCGv_i64 t = tcg_temp_new_i64();
4125
4126 gen_srshr16_i64(t, a, sh);
4127 tcg_gen_vec_add16_i64(d, d, t);
4128 tcg_temp_free_i64(t);
4129 }
4130
4131 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4132 {
4133 TCGv_i32 t = tcg_temp_new_i32();
4134
4135 gen_srshr32_i32(t, a, sh);
4136 tcg_gen_add_i32(d, d, t);
4137 tcg_temp_free_i32(t);
4138 }
4139
4140 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4141 {
4142 TCGv_i64 t = tcg_temp_new_i64();
4143
4144 gen_srshr64_i64(t, a, sh);
4145 tcg_gen_add_i64(d, d, t);
4146 tcg_temp_free_i64(t);
4147 }
4148
4149 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4150 {
4151 TCGv_vec t = tcg_temp_new_vec_matching(d);
4152
4153 gen_srshr_vec(vece, t, a, sh);
4154 tcg_gen_add_vec(vece, d, d, t);
4155 tcg_temp_free_vec(t);
4156 }
4157
4158 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4159 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4160 {
4161 static const TCGOpcode vecop_list[] = {
4162 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
4163 };
4164 static const GVecGen2i ops[4] = {
4165 { .fni8 = gen_srsra8_i64,
4166 .fniv = gen_srsra_vec,
4167 .fno = gen_helper_gvec_srsra_b,
4168 .opt_opc = vecop_list,
4169 .load_dest = true,
4170 .vece = MO_8 },
4171 { .fni8 = gen_srsra16_i64,
4172 .fniv = gen_srsra_vec,
4173 .fno = gen_helper_gvec_srsra_h,
4174 .opt_opc = vecop_list,
4175 .load_dest = true,
4176 .vece = MO_16 },
4177 { .fni4 = gen_srsra32_i32,
4178 .fniv = gen_srsra_vec,
4179 .fno = gen_helper_gvec_srsra_s,
4180 .opt_opc = vecop_list,
4181 .load_dest = true,
4182 .vece = MO_32 },
4183 { .fni8 = gen_srsra64_i64,
4184 .fniv = gen_srsra_vec,
4185 .fno = gen_helper_gvec_srsra_d,
4186 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4187 .opt_opc = vecop_list,
4188 .load_dest = true,
4189 .vece = MO_64 },
4190 };
4191
4192 /* tszimm encoding produces immediates in the range [1..esize] */
4193 tcg_debug_assert(shift > 0);
4194 tcg_debug_assert(shift <= (8 << vece));
4195
4196 /*
4197 * Shifts larger than the element size are architecturally valid.
4198 * Signed results in all sign bits. With rounding, this produces
4199 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
4200 * I.e. always zero. With accumulation, this leaves D unchanged.
4201 */
4202 if (shift == (8 << vece)) {
4203 /* Nop, but we do need to clear the tail. */
4204 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4205 } else {
4206 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4207 }
4208 }
4209
4210 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4211 {
4212 TCGv_i64 t = tcg_temp_new_i64();
4213
4214 tcg_gen_shri_i64(t, a, sh - 1);
4215 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
4216 tcg_gen_vec_shr8i_i64(d, a, sh);
4217 tcg_gen_vec_add8_i64(d, d, t);
4218 tcg_temp_free_i64(t);
4219 }
4220
4221 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4222 {
4223 TCGv_i64 t = tcg_temp_new_i64();
4224
4225 tcg_gen_shri_i64(t, a, sh - 1);
4226 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
4227 tcg_gen_vec_shr16i_i64(d, a, sh);
4228 tcg_gen_vec_add16_i64(d, d, t);
4229 tcg_temp_free_i64(t);
4230 }
4231
4232 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4233 {
4234 TCGv_i32 t = tcg_temp_new_i32();
4235
4236 tcg_gen_extract_i32(t, a, sh - 1, 1);
4237 tcg_gen_shri_i32(d, a, sh);
4238 tcg_gen_add_i32(d, d, t);
4239 tcg_temp_free_i32(t);
4240 }
4241
4242 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4243 {
4244 TCGv_i64 t = tcg_temp_new_i64();
4245
4246 tcg_gen_extract_i64(t, a, sh - 1, 1);
4247 tcg_gen_shri_i64(d, a, sh);
4248 tcg_gen_add_i64(d, d, t);
4249 tcg_temp_free_i64(t);
4250 }
4251
4252 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
4253 {
4254 TCGv_vec t = tcg_temp_new_vec_matching(d);
4255 TCGv_vec ones = tcg_temp_new_vec_matching(d);
4256
4257 tcg_gen_shri_vec(vece, t, a, shift - 1);
4258 tcg_gen_dupi_vec(vece, ones, 1);
4259 tcg_gen_and_vec(vece, t, t, ones);
4260 tcg_gen_shri_vec(vece, d, a, shift);
4261 tcg_gen_add_vec(vece, d, d, t);
4262
4263 tcg_temp_free_vec(t);
4264 tcg_temp_free_vec(ones);
4265 }
4266
4267 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4268 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4269 {
4270 static const TCGOpcode vecop_list[] = {
4271 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4272 };
4273 static const GVecGen2i ops[4] = {
4274 { .fni8 = gen_urshr8_i64,
4275 .fniv = gen_urshr_vec,
4276 .fno = gen_helper_gvec_urshr_b,
4277 .opt_opc = vecop_list,
4278 .vece = MO_8 },
4279 { .fni8 = gen_urshr16_i64,
4280 .fniv = gen_urshr_vec,
4281 .fno = gen_helper_gvec_urshr_h,
4282 .opt_opc = vecop_list,
4283 .vece = MO_16 },
4284 { .fni4 = gen_urshr32_i32,
4285 .fniv = gen_urshr_vec,
4286 .fno = gen_helper_gvec_urshr_s,
4287 .opt_opc = vecop_list,
4288 .vece = MO_32 },
4289 { .fni8 = gen_urshr64_i64,
4290 .fniv = gen_urshr_vec,
4291 .fno = gen_helper_gvec_urshr_d,
4292 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4293 .opt_opc = vecop_list,
4294 .vece = MO_64 },
4295 };
4296
4297 /* tszimm encoding produces immediates in the range [1..esize] */
4298 tcg_debug_assert(shift > 0);
4299 tcg_debug_assert(shift <= (8 << vece));
4300
4301 if (shift == (8 << vece)) {
4302 /*
4303 * Shifts larger than the element size are architecturally valid.
4304 * Unsigned results in zero. With rounding, this produces a
4305 * copy of the most significant bit.
4306 */
4307 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4308 } else {
4309 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4310 }
4311 }
4312
4313 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4314 {
4315 TCGv_i64 t = tcg_temp_new_i64();
4316
4317 if (sh == 8) {
4318 tcg_gen_vec_shr8i_i64(t, a, 7);
4319 } else {
4320 gen_urshr8_i64(t, a, sh);
4321 }
4322 tcg_gen_vec_add8_i64(d, d, t);
4323 tcg_temp_free_i64(t);
4324 }
4325
4326 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4327 {
4328 TCGv_i64 t = tcg_temp_new_i64();
4329
4330 if (sh == 16) {
4331 tcg_gen_vec_shr16i_i64(t, a, 15);
4332 } else {
4333 gen_urshr16_i64(t, a, sh);
4334 }
4335 tcg_gen_vec_add16_i64(d, d, t);
4336 tcg_temp_free_i64(t);
4337 }
4338
4339 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4340 {
4341 TCGv_i32 t = tcg_temp_new_i32();
4342
4343 if (sh == 32) {
4344 tcg_gen_shri_i32(t, a, 31);
4345 } else {
4346 gen_urshr32_i32(t, a, sh);
4347 }
4348 tcg_gen_add_i32(d, d, t);
4349 tcg_temp_free_i32(t);
4350 }
4351
4352 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4353 {
4354 TCGv_i64 t = tcg_temp_new_i64();
4355
4356 if (sh == 64) {
4357 tcg_gen_shri_i64(t, a, 63);
4358 } else {
4359 gen_urshr64_i64(t, a, sh);
4360 }
4361 tcg_gen_add_i64(d, d, t);
4362 tcg_temp_free_i64(t);
4363 }
4364
4365 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4366 {
4367 TCGv_vec t = tcg_temp_new_vec_matching(d);
4368
4369 if (sh == (8 << vece)) {
4370 tcg_gen_shri_vec(vece, t, a, sh - 1);
4371 } else {
4372 gen_urshr_vec(vece, t, a, sh);
4373 }
4374 tcg_gen_add_vec(vece, d, d, t);
4375 tcg_temp_free_vec(t);
4376 }
4377
4378 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4379 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4380 {
4381 static const TCGOpcode vecop_list[] = {
4382 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4383 };
4384 static const GVecGen2i ops[4] = {
4385 { .fni8 = gen_ursra8_i64,
4386 .fniv = gen_ursra_vec,
4387 .fno = gen_helper_gvec_ursra_b,
4388 .opt_opc = vecop_list,
4389 .load_dest = true,
4390 .vece = MO_8 },
4391 { .fni8 = gen_ursra16_i64,
4392 .fniv = gen_ursra_vec,
4393 .fno = gen_helper_gvec_ursra_h,
4394 .opt_opc = vecop_list,
4395 .load_dest = true,
4396 .vece = MO_16 },
4397 { .fni4 = gen_ursra32_i32,
4398 .fniv = gen_ursra_vec,
4399 .fno = gen_helper_gvec_ursra_s,
4400 .opt_opc = vecop_list,
4401 .load_dest = true,
4402 .vece = MO_32 },
4403 { .fni8 = gen_ursra64_i64,
4404 .fniv = gen_ursra_vec,
4405 .fno = gen_helper_gvec_ursra_d,
4406 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4407 .opt_opc = vecop_list,
4408 .load_dest = true,
4409 .vece = MO_64 },
4410 };
4411
4412 /* tszimm encoding produces immediates in the range [1..esize] */
4413 tcg_debug_assert(shift > 0);
4414 tcg_debug_assert(shift <= (8 << vece));
4415
4416 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4417 }
4418
4419 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4420 {
4421 uint64_t mask = dup_const(MO_8, 0xff >> shift);
4422 TCGv_i64 t = tcg_temp_new_i64();
4423
4424 tcg_gen_shri_i64(t, a, shift);
4425 tcg_gen_andi_i64(t, t, mask);
4426 tcg_gen_andi_i64(d, d, ~mask);
4427 tcg_gen_or_i64(d, d, t);
4428 tcg_temp_free_i64(t);
4429 }
4430
4431 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4432 {
4433 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4434 TCGv_i64 t = tcg_temp_new_i64();
4435
4436 tcg_gen_shri_i64(t, a, shift);
4437 tcg_gen_andi_i64(t, t, mask);
4438 tcg_gen_andi_i64(d, d, ~mask);
4439 tcg_gen_or_i64(d, d, t);
4440 tcg_temp_free_i64(t);
4441 }
4442
4443 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4444 {
4445 tcg_gen_shri_i32(a, a, shift);
4446 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4447 }
4448
4449 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4450 {
4451 tcg_gen_shri_i64(a, a, shift);
4452 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4453 }
4454
4455 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4456 {
4457 TCGv_vec t = tcg_temp_new_vec_matching(d);
4458 TCGv_vec m = tcg_temp_new_vec_matching(d);
4459
4460 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4461 tcg_gen_shri_vec(vece, t, a, sh);
4462 tcg_gen_and_vec(vece, d, d, m);
4463 tcg_gen_or_vec(vece, d, d, t);
4464
4465 tcg_temp_free_vec(t);
4466 tcg_temp_free_vec(m);
4467 }
4468
4469 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4470 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4471 {
4472 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4473 const GVecGen2i ops[4] = {
4474 { .fni8 = gen_shr8_ins_i64,
4475 .fniv = gen_shr_ins_vec,
4476 .fno = gen_helper_gvec_sri_b,
4477 .load_dest = true,
4478 .opt_opc = vecop_list,
4479 .vece = MO_8 },
4480 { .fni8 = gen_shr16_ins_i64,
4481 .fniv = gen_shr_ins_vec,
4482 .fno = gen_helper_gvec_sri_h,
4483 .load_dest = true,
4484 .opt_opc = vecop_list,
4485 .vece = MO_16 },
4486 { .fni4 = gen_shr32_ins_i32,
4487 .fniv = gen_shr_ins_vec,
4488 .fno = gen_helper_gvec_sri_s,
4489 .load_dest = true,
4490 .opt_opc = vecop_list,
4491 .vece = MO_32 },
4492 { .fni8 = gen_shr64_ins_i64,
4493 .fniv = gen_shr_ins_vec,
4494 .fno = gen_helper_gvec_sri_d,
4495 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4496 .load_dest = true,
4497 .opt_opc = vecop_list,
4498 .vece = MO_64 },
4499 };
4500
4501 /* tszimm encoding produces immediates in the range [1..esize]. */
4502 tcg_debug_assert(shift > 0);
4503 tcg_debug_assert(shift <= (8 << vece));
4504
4505 /* Shift of esize leaves destination unchanged. */
4506 if (shift < (8 << vece)) {
4507 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4508 } else {
4509 /* Nop, but we do need to clear the tail. */
4510 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4511 }
4512 }
4513
4514 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4515 {
4516 uint64_t mask = dup_const(MO_8, 0xff << shift);
4517 TCGv_i64 t = tcg_temp_new_i64();
4518
4519 tcg_gen_shli_i64(t, a, shift);
4520 tcg_gen_andi_i64(t, t, mask);
4521 tcg_gen_andi_i64(d, d, ~mask);
4522 tcg_gen_or_i64(d, d, t);
4523 tcg_temp_free_i64(t);
4524 }
4525
4526 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4527 {
4528 uint64_t mask = dup_const(MO_16, 0xffff << shift);
4529 TCGv_i64 t = tcg_temp_new_i64();
4530
4531 tcg_gen_shli_i64(t, a, shift);
4532 tcg_gen_andi_i64(t, t, mask);
4533 tcg_gen_andi_i64(d, d, ~mask);
4534 tcg_gen_or_i64(d, d, t);
4535 tcg_temp_free_i64(t);
4536 }
4537
4538 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4539 {
4540 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4541 }
4542
4543 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4544 {
4545 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4546 }
4547
4548 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4549 {
4550 TCGv_vec t = tcg_temp_new_vec_matching(d);
4551 TCGv_vec m = tcg_temp_new_vec_matching(d);
4552
4553 tcg_gen_shli_vec(vece, t, a, sh);
4554 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4555 tcg_gen_and_vec(vece, d, d, m);
4556 tcg_gen_or_vec(vece, d, d, t);
4557
4558 tcg_temp_free_vec(t);
4559 tcg_temp_free_vec(m);
4560 }
4561
4562 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4563 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4564 {
4565 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4566 const GVecGen2i ops[4] = {
4567 { .fni8 = gen_shl8_ins_i64,
4568 .fniv = gen_shl_ins_vec,
4569 .fno = gen_helper_gvec_sli_b,
4570 .load_dest = true,
4571 .opt_opc = vecop_list,
4572 .vece = MO_8 },
4573 { .fni8 = gen_shl16_ins_i64,
4574 .fniv = gen_shl_ins_vec,
4575 .fno = gen_helper_gvec_sli_h,
4576 .load_dest = true,
4577 .opt_opc = vecop_list,
4578 .vece = MO_16 },
4579 { .fni4 = gen_shl32_ins_i32,
4580 .fniv = gen_shl_ins_vec,
4581 .fno = gen_helper_gvec_sli_s,
4582 .load_dest = true,
4583 .opt_opc = vecop_list,
4584 .vece = MO_32 },
4585 { .fni8 = gen_shl64_ins_i64,
4586 .fniv = gen_shl_ins_vec,
4587 .fno = gen_helper_gvec_sli_d,
4588 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4589 .load_dest = true,
4590 .opt_opc = vecop_list,
4591 .vece = MO_64 },
4592 };
4593
4594 /* tszimm encoding produces immediates in the range [0..esize-1]. */
4595 tcg_debug_assert(shift >= 0);
4596 tcg_debug_assert(shift < (8 << vece));
4597
4598 if (shift == 0) {
4599 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4600 } else {
4601 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4602 }
4603 }
4604
4605 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4606 {
4607 gen_helper_neon_mul_u8(a, a, b);
4608 gen_helper_neon_add_u8(d, d, a);
4609 }
4610
4611 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4612 {
4613 gen_helper_neon_mul_u8(a, a, b);
4614 gen_helper_neon_sub_u8(d, d, a);
4615 }
4616
4617 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4618 {
4619 gen_helper_neon_mul_u16(a, a, b);
4620 gen_helper_neon_add_u16(d, d, a);
4621 }
4622
4623 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4624 {
4625 gen_helper_neon_mul_u16(a, a, b);
4626 gen_helper_neon_sub_u16(d, d, a);
4627 }
4628
4629 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4630 {
4631 tcg_gen_mul_i32(a, a, b);
4632 tcg_gen_add_i32(d, d, a);
4633 }
4634
4635 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4636 {
4637 tcg_gen_mul_i32(a, a, b);
4638 tcg_gen_sub_i32(d, d, a);
4639 }
4640
4641 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4642 {
4643 tcg_gen_mul_i64(a, a, b);
4644 tcg_gen_add_i64(d, d, a);
4645 }
4646
4647 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4648 {
4649 tcg_gen_mul_i64(a, a, b);
4650 tcg_gen_sub_i64(d, d, a);
4651 }
4652
4653 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4654 {
4655 tcg_gen_mul_vec(vece, a, a, b);
4656 tcg_gen_add_vec(vece, d, d, a);
4657 }
4658
4659 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4660 {
4661 tcg_gen_mul_vec(vece, a, a, b);
4662 tcg_gen_sub_vec(vece, d, d, a);
4663 }
4664
4665 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4666 * these tables are shared with AArch64 which does support them.
4667 */
4668
4669 static const TCGOpcode vecop_list_mla[] = {
4670 INDEX_op_mul_vec, INDEX_op_add_vec, 0
4671 };
4672
4673 static const TCGOpcode vecop_list_mls[] = {
4674 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4675 };
4676
4677 const GVecGen3 mla_op[4] = {
4678 { .fni4 = gen_mla8_i32,
4679 .fniv = gen_mla_vec,
4680 .load_dest = true,
4681 .opt_opc = vecop_list_mla,
4682 .vece = MO_8 },
4683 { .fni4 = gen_mla16_i32,
4684 .fniv = gen_mla_vec,
4685 .load_dest = true,
4686 .opt_opc = vecop_list_mla,
4687 .vece = MO_16 },
4688 { .fni4 = gen_mla32_i32,
4689 .fniv = gen_mla_vec,
4690 .load_dest = true,
4691 .opt_opc = vecop_list_mla,
4692 .vece = MO_32 },
4693 { .fni8 = gen_mla64_i64,
4694 .fniv = gen_mla_vec,
4695 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4696 .load_dest = true,
4697 .opt_opc = vecop_list_mla,
4698 .vece = MO_64 },
4699 };
4700
4701 const GVecGen3 mls_op[4] = {
4702 { .fni4 = gen_mls8_i32,
4703 .fniv = gen_mls_vec,
4704 .load_dest = true,
4705 .opt_opc = vecop_list_mls,
4706 .vece = MO_8 },
4707 { .fni4 = gen_mls16_i32,
4708 .fniv = gen_mls_vec,
4709 .load_dest = true,
4710 .opt_opc = vecop_list_mls,
4711 .vece = MO_16 },
4712 { .fni4 = gen_mls32_i32,
4713 .fniv = gen_mls_vec,
4714 .load_dest = true,
4715 .opt_opc = vecop_list_mls,
4716 .vece = MO_32 },
4717 { .fni8 = gen_mls64_i64,
4718 .fniv = gen_mls_vec,
4719 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4720 .load_dest = true,
4721 .opt_opc = vecop_list_mls,
4722 .vece = MO_64 },
4723 };
4724
4725 /* CMTST : test is "if (X & Y != 0)". */
4726 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4727 {
4728 tcg_gen_and_i32(d, a, b);
4729 tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4730 tcg_gen_neg_i32(d, d);
4731 }
4732
4733 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4734 {
4735 tcg_gen_and_i64(d, a, b);
4736 tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4737 tcg_gen_neg_i64(d, d);
4738 }
4739
4740 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4741 {
4742 tcg_gen_and_vec(vece, d, a, b);
4743 tcg_gen_dupi_vec(vece, a, 0);
4744 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4745 }
4746
4747 static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
4748
4749 const GVecGen3 cmtst_op[4] = {
4750 { .fni4 = gen_helper_neon_tst_u8,
4751 .fniv = gen_cmtst_vec,
4752 .opt_opc = vecop_list_cmtst,
4753 .vece = MO_8 },
4754 { .fni4 = gen_helper_neon_tst_u16,
4755 .fniv = gen_cmtst_vec,
4756 .opt_opc = vecop_list_cmtst,
4757 .vece = MO_16 },
4758 { .fni4 = gen_cmtst_i32,
4759 .fniv = gen_cmtst_vec,
4760 .opt_opc = vecop_list_cmtst,
4761 .vece = MO_32 },
4762 { .fni8 = gen_cmtst_i64,
4763 .fniv = gen_cmtst_vec,
4764 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4765 .opt_opc = vecop_list_cmtst,
4766 .vece = MO_64 },
4767 };
4768
4769 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4770 {
4771 TCGv_i32 lval = tcg_temp_new_i32();
4772 TCGv_i32 rval = tcg_temp_new_i32();
4773 TCGv_i32 lsh = tcg_temp_new_i32();
4774 TCGv_i32 rsh = tcg_temp_new_i32();
4775 TCGv_i32 zero = tcg_const_i32(0);
4776 TCGv_i32 max = tcg_const_i32(32);
4777
4778 /*
4779 * Rely on the TCG guarantee that out of range shifts produce
4780 * unspecified results, not undefined behaviour (i.e. no trap).
4781 * Discard out-of-range results after the fact.
4782 */
4783 tcg_gen_ext8s_i32(lsh, shift);
4784 tcg_gen_neg_i32(rsh, lsh);
4785 tcg_gen_shl_i32(lval, src, lsh);
4786 tcg_gen_shr_i32(rval, src, rsh);
4787 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4788 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4789
4790 tcg_temp_free_i32(lval);
4791 tcg_temp_free_i32(rval);
4792 tcg_temp_free_i32(lsh);
4793 tcg_temp_free_i32(rsh);
4794 tcg_temp_free_i32(zero);
4795 tcg_temp_free_i32(max);
4796 }
4797
4798 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4799 {
4800 TCGv_i64 lval = tcg_temp_new_i64();
4801 TCGv_i64 rval = tcg_temp_new_i64();
4802 TCGv_i64 lsh = tcg_temp_new_i64();
4803 TCGv_i64 rsh = tcg_temp_new_i64();
4804 TCGv_i64 zero = tcg_const_i64(0);
4805 TCGv_i64 max = tcg_const_i64(64);
4806
4807 /*
4808 * Rely on the TCG guarantee that out of range shifts produce
4809 * unspecified results, not undefined behaviour (i.e. no trap).
4810 * Discard out-of-range results after the fact.
4811 */
4812 tcg_gen_ext8s_i64(lsh, shift);
4813 tcg_gen_neg_i64(rsh, lsh);
4814 tcg_gen_shl_i64(lval, src, lsh);
4815 tcg_gen_shr_i64(rval, src, rsh);
4816 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4817 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4818
4819 tcg_temp_free_i64(lval);
4820 tcg_temp_free_i64(rval);
4821 tcg_temp_free_i64(lsh);
4822 tcg_temp_free_i64(rsh);
4823 tcg_temp_free_i64(zero);
4824 tcg_temp_free_i64(max);
4825 }
4826
4827 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4828 TCGv_vec src, TCGv_vec shift)
4829 {
4830 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4831 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4832 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4833 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4834 TCGv_vec msk, max;
4835
4836 tcg_gen_neg_vec(vece, rsh, shift);
4837 if (vece == MO_8) {
4838 tcg_gen_mov_vec(lsh, shift);
4839 } else {
4840 msk = tcg_temp_new_vec_matching(dst);
4841 tcg_gen_dupi_vec(vece, msk, 0xff);
4842 tcg_gen_and_vec(vece, lsh, shift, msk);
4843 tcg_gen_and_vec(vece, rsh, rsh, msk);
4844 tcg_temp_free_vec(msk);
4845 }
4846
4847 /*
4848 * Rely on the TCG guarantee that out of range shifts produce
4849 * unspecified results, not undefined behaviour (i.e. no trap).
4850 * Discard out-of-range results after the fact.
4851 */
4852 tcg_gen_shlv_vec(vece, lval, src, lsh);
4853 tcg_gen_shrv_vec(vece, rval, src, rsh);
4854
4855 max = tcg_temp_new_vec_matching(dst);
4856 tcg_gen_dupi_vec(vece, max, 8 << vece);
4857
4858 /*
4859 * The choice of LT (signed) and GEU (unsigned) are biased toward
4860 * the instructions of the x86_64 host. For MO_8, the whole byte
4861 * is significant so we must use an unsigned compare; otherwise we
4862 * have already masked to a byte and so a signed compare works.
4863 * Other tcg hosts have a full set of comparisons and do not care.
4864 */
4865 if (vece == MO_8) {
4866 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4867 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4868 tcg_gen_andc_vec(vece, lval, lval, lsh);
4869 tcg_gen_andc_vec(vece, rval, rval, rsh);
4870 } else {
4871 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4872 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4873 tcg_gen_and_vec(vece, lval, lval, lsh);
4874 tcg_gen_and_vec(vece, rval, rval, rsh);
4875 }
4876 tcg_gen_or_vec(vece, dst, lval, rval);
4877
4878 tcg_temp_free_vec(max);
4879 tcg_temp_free_vec(lval);
4880 tcg_temp_free_vec(rval);
4881 tcg_temp_free_vec(lsh);
4882 tcg_temp_free_vec(rsh);
4883 }
4884
4885 static const TCGOpcode ushl_list[] = {
4886 INDEX_op_neg_vec, INDEX_op_shlv_vec,
4887 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4888 };
4889
4890 const GVecGen3 ushl_op[4] = {
4891 { .fniv = gen_ushl_vec,
4892 .fno = gen_helper_gvec_ushl_b,
4893 .opt_opc = ushl_list,
4894 .vece = MO_8 },
4895 { .fniv = gen_ushl_vec,
4896 .fno = gen_helper_gvec_ushl_h,
4897 .opt_opc = ushl_list,
4898 .vece = MO_16 },
4899 { .fni4 = gen_ushl_i32,
4900 .fniv = gen_ushl_vec,
4901 .opt_opc = ushl_list,
4902 .vece = MO_32 },
4903 { .fni8 = gen_ushl_i64,
4904 .fniv = gen_ushl_vec,
4905 .opt_opc = ushl_list,
4906 .vece = MO_64 },
4907 };
4908
4909 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4910 {
4911 TCGv_i32 lval = tcg_temp_new_i32();
4912 TCGv_i32 rval = tcg_temp_new_i32();
4913 TCGv_i32 lsh = tcg_temp_new_i32();
4914 TCGv_i32 rsh = tcg_temp_new_i32();
4915 TCGv_i32 zero = tcg_const_i32(0);
4916 TCGv_i32 max = tcg_const_i32(31);
4917
4918 /*
4919 * Rely on the TCG guarantee that out of range shifts produce
4920 * unspecified results, not undefined behaviour (i.e. no trap).
4921 * Discard out-of-range results after the fact.
4922 */
4923 tcg_gen_ext8s_i32(lsh, shift);
4924 tcg_gen_neg_i32(rsh, lsh);
4925 tcg_gen_shl_i32(lval, src, lsh);
4926 tcg_gen_umin_i32(rsh, rsh, max);
4927 tcg_gen_sar_i32(rval, src, rsh);
4928 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4929 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4930
4931 tcg_temp_free_i32(lval);
4932 tcg_temp_free_i32(rval);
4933 tcg_temp_free_i32(lsh);
4934 tcg_temp_free_i32(rsh);
4935 tcg_temp_free_i32(zero);
4936 tcg_temp_free_i32(max);
4937 }
4938
4939 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4940 {
4941 TCGv_i64 lval = tcg_temp_new_i64();
4942 TCGv_i64 rval = tcg_temp_new_i64();
4943 TCGv_i64 lsh = tcg_temp_new_i64();
4944 TCGv_i64 rsh = tcg_temp_new_i64();
4945 TCGv_i64 zero = tcg_const_i64(0);
4946 TCGv_i64 max = tcg_const_i64(63);
4947
4948 /*
4949 * Rely on the TCG guarantee that out of range shifts produce
4950 * unspecified results, not undefined behaviour (i.e. no trap).
4951 * Discard out-of-range results after the fact.
4952 */
4953 tcg_gen_ext8s_i64(lsh, shift);
4954 tcg_gen_neg_i64(rsh, lsh);
4955 tcg_gen_shl_i64(lval, src, lsh);
4956 tcg_gen_umin_i64(rsh, rsh, max);
4957 tcg_gen_sar_i64(rval, src, rsh);
4958 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4959 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4960
4961 tcg_temp_free_i64(lval);
4962 tcg_temp_free_i64(rval);
4963 tcg_temp_free_i64(lsh);
4964 tcg_temp_free_i64(rsh);
4965 tcg_temp_free_i64(zero);
4966 tcg_temp_free_i64(max);
4967 }
4968
4969 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4970 TCGv_vec src, TCGv_vec shift)
4971 {
4972 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4973 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4974 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4975 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4976 TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4977
4978 /*
4979 * Rely on the TCG guarantee that out of range shifts produce
4980 * unspecified results, not undefined behaviour (i.e. no trap).
4981 * Discard out-of-range results after the fact.
4982 */
4983 tcg_gen_neg_vec(vece, rsh, shift);
4984 if (vece == MO_8) {
4985 tcg_gen_mov_vec(lsh, shift);
4986 } else {
4987 tcg_gen_dupi_vec(vece, tmp, 0xff);
4988 tcg_gen_and_vec(vece, lsh, shift, tmp);
4989 tcg_gen_and_vec(vece, rsh, rsh, tmp);
4990 }
4991
4992 /* Bound rsh so out of bound right shift gets -1. */
4993 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4994 tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4995 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4996
4997 tcg_gen_shlv_vec(vece, lval, src, lsh);
4998 tcg_gen_sarv_vec(vece, rval, src, rsh);
4999
5000 /* Select in-bound left shift. */
5001 tcg_gen_andc_vec(vece, lval, lval, tmp);
5002
5003 /* Select between left and right shift. */
5004 if (vece == MO_8) {
5005 tcg_gen_dupi_vec(vece, tmp, 0);
5006 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
5007 } else {
5008 tcg_gen_dupi_vec(vece, tmp, 0x80);
5009 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
5010 }
5011
5012 tcg_temp_free_vec(lval);
5013 tcg_temp_free_vec(rval);
5014 tcg_temp_free_vec(lsh);
5015 tcg_temp_free_vec(rsh);
5016 tcg_temp_free_vec(tmp);
5017 }
5018
5019 static const TCGOpcode sshl_list[] = {
5020 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
5021 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
5022 };
5023
5024 const GVecGen3 sshl_op[4] = {
5025 { .fniv = gen_sshl_vec,
5026 .fno = gen_helper_gvec_sshl_b,
5027 .opt_opc = sshl_list,
5028 .vece = MO_8 },
5029 { .fniv = gen_sshl_vec,
5030 .fno = gen_helper_gvec_sshl_h,
5031 .opt_opc = sshl_list,
5032 .vece = MO_16 },
5033 { .fni4 = gen_sshl_i32,
5034 .fniv = gen_sshl_vec,
5035 .opt_opc = sshl_list,
5036 .vece = MO_32 },
5037 { .fni8 = gen_sshl_i64,
5038 .fniv = gen_sshl_vec,
5039 .opt_opc = sshl_list,
5040 .vece = MO_64 },
5041 };
5042
5043 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5044 TCGv_vec a, TCGv_vec b)
5045 {
5046 TCGv_vec x = tcg_temp_new_vec_matching(t);
5047 tcg_gen_add_vec(vece, x, a, b);
5048 tcg_gen_usadd_vec(vece, t, a, b);
5049 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5050 tcg_gen_or_vec(vece, sat, sat, x);
5051 tcg_temp_free_vec(x);
5052 }
5053
5054 static const TCGOpcode vecop_list_uqadd[] = {
5055 INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
5056 };
5057
5058 const GVecGen4 uqadd_op[4] = {
5059 { .fniv = gen_uqadd_vec,
5060 .fno = gen_helper_gvec_uqadd_b,
5061 .write_aofs = true,
5062 .opt_opc = vecop_list_uqadd,
5063 .vece = MO_8 },
5064 { .fniv = gen_uqadd_vec,
5065 .fno = gen_helper_gvec_uqadd_h,
5066 .write_aofs = true,
5067 .opt_opc = vecop_list_uqadd,
5068 .vece = MO_16 },
5069 { .fniv = gen_uqadd_vec,
5070 .fno = gen_helper_gvec_uqadd_s,
5071 .write_aofs = true,
5072 .opt_opc = vecop_list_uqadd,
5073 .vece = MO_32 },
5074 { .fniv = gen_uqadd_vec,
5075 .fno = gen_helper_gvec_uqadd_d,
5076 .write_aofs = true,
5077 .opt_opc = vecop_list_uqadd,
5078 .vece = MO_64 },
5079 };
5080
5081 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5082 TCGv_vec a, TCGv_vec b)
5083 {
5084 TCGv_vec x = tcg_temp_new_vec_matching(t);
5085 tcg_gen_add_vec(vece, x, a, b);
5086 tcg_gen_ssadd_vec(vece, t, a, b);
5087 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5088 tcg_gen_or_vec(vece, sat, sat, x);
5089 tcg_temp_free_vec(x);
5090 }
5091
5092 static const TCGOpcode vecop_list_sqadd[] = {
5093 INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
5094 };
5095
5096 const GVecGen4 sqadd_op[4] = {
5097 { .fniv = gen_sqadd_vec,
5098 .fno = gen_helper_gvec_sqadd_b,
5099 .opt_opc = vecop_list_sqadd,
5100 .write_aofs = true,
5101 .vece = MO_8 },
5102 { .fniv = gen_sqadd_vec,
5103 .fno = gen_helper_gvec_sqadd_h,
5104 .opt_opc = vecop_list_sqadd,
5105 .write_aofs = true,
5106 .vece = MO_16 },
5107 { .fniv = gen_sqadd_vec,
5108 .fno = gen_helper_gvec_sqadd_s,
5109 .opt_opc = vecop_list_sqadd,
5110 .write_aofs = true,
5111 .vece = MO_32 },
5112 { .fniv = gen_sqadd_vec,
5113 .fno = gen_helper_gvec_sqadd_d,
5114 .opt_opc = vecop_list_sqadd,
5115 .write_aofs = true,
5116 .vece = MO_64 },
5117 };
5118
5119 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5120 TCGv_vec a, TCGv_vec b)
5121 {
5122 TCGv_vec x = tcg_temp_new_vec_matching(t);
5123 tcg_gen_sub_vec(vece, x, a, b);
5124 tcg_gen_ussub_vec(vece, t, a, b);
5125 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5126 tcg_gen_or_vec(vece, sat, sat, x);
5127 tcg_temp_free_vec(x);
5128 }
5129
5130 static const TCGOpcode vecop_list_uqsub[] = {
5131 INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5132 };
5133
5134 const GVecGen4 uqsub_op[4] = {
5135 { .fniv = gen_uqsub_vec,
5136 .fno = gen_helper_gvec_uqsub_b,
5137 .opt_opc = vecop_list_uqsub,
5138 .write_aofs = true,
5139 .vece = MO_8 },
5140 { .fniv = gen_uqsub_vec,
5141 .fno = gen_helper_gvec_uqsub_h,
5142 .opt_opc = vecop_list_uqsub,
5143 .write_aofs = true,
5144 .vece = MO_16 },
5145 { .fniv = gen_uqsub_vec,
5146 .fno = gen_helper_gvec_uqsub_s,
5147 .opt_opc = vecop_list_uqsub,
5148 .write_aofs = true,
5149 .vece = MO_32 },
5150 { .fniv = gen_uqsub_vec,
5151 .fno = gen_helper_gvec_uqsub_d,
5152 .opt_opc = vecop_list_uqsub,
5153 .write_aofs = true,
5154 .vece = MO_64 },
5155 };
5156
5157 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5158 TCGv_vec a, TCGv_vec b)
5159 {
5160 TCGv_vec x = tcg_temp_new_vec_matching(t);
5161 tcg_gen_sub_vec(vece, x, a, b);
5162 tcg_gen_sssub_vec(vece, t, a, b);
5163 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5164 tcg_gen_or_vec(vece, sat, sat, x);
5165 tcg_temp_free_vec(x);
5166 }
5167
5168 static const TCGOpcode vecop_list_sqsub[] = {
5169 INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5170 };
5171
5172 const GVecGen4 sqsub_op[4] = {
5173 { .fniv = gen_sqsub_vec,
5174 .fno = gen_helper_gvec_sqsub_b,
5175 .opt_opc = vecop_list_sqsub,
5176 .write_aofs = true,
5177 .vece = MO_8 },
5178 { .fniv = gen_sqsub_vec,
5179 .fno = gen_helper_gvec_sqsub_h,
5180 .opt_opc = vecop_list_sqsub,
5181 .write_aofs = true,
5182 .vece = MO_16 },
5183 { .fniv = gen_sqsub_vec,
5184 .fno = gen_helper_gvec_sqsub_s,
5185 .opt_opc = vecop_list_sqsub,
5186 .write_aofs = true,
5187 .vece = MO_32 },
5188 { .fniv = gen_sqsub_vec,
5189 .fno = gen_helper_gvec_sqsub_d,
5190 .opt_opc = vecop_list_sqsub,
5191 .write_aofs = true,
5192 .vece = MO_64 },
5193 };
5194
5195 /* Translate a NEON data processing instruction. Return nonzero if the
5196 instruction is invalid.
5197 We process data in a mixture of 32-bit and 64-bit chunks.
5198 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
5199
5200 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5201 {
5202 int op;
5203 int q;
5204 int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5205 int size;
5206 int shift;
5207 int pass;
5208 int count;
5209 int pairwise;
5210 int u;
5211 int vec_size;
5212 uint32_t imm;
5213 TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5214 TCGv_ptr ptr1, ptr2, ptr3;
5215 TCGv_i64 tmp64;
5216
5217 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5218 return 1;
5219 }
5220
5221 /* FIXME: this access check should not take precedence over UNDEF
5222 * for invalid encodings; we will generate incorrect syndrome information
5223 * for attempts to execute invalid vfp/neon encodings with FP disabled.
5224 */
5225 if (s->fp_excp_el) {
5226 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5227 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5228 return 0;
5229 }
5230
5231 if (!s->vfp_enabled)
5232 return 1;
5233 q = (insn & (1 << 6)) != 0;
5234 u = (insn >> 24) & 1;
5235 VFP_DREG_D(rd, insn);
5236 VFP_DREG_N(rn, insn);
5237 VFP_DREG_M(rm, insn);
5238 size = (insn >> 20) & 3;
5239 vec_size = q ? 16 : 8;
5240 rd_ofs = neon_reg_offset(rd, 0);
5241 rn_ofs = neon_reg_offset(rn, 0);
5242 rm_ofs = neon_reg_offset(rm, 0);
5243
5244 if ((insn & (1 << 23)) == 0) {
5245 /* Three register same length. */
5246 op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5247 /* Catch invalid op and bad size combinations: UNDEF */
5248 if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5249 return 1;
5250 }
5251 /* All insns of this form UNDEF for either this condition or the
5252 * superset of cases "Q==1"; we catch the latter later.
5253 */
5254 if (q && ((rd | rn | rm) & 1)) {
5255 return 1;
5256 }
5257 switch (op) {
5258 case NEON_3R_SHA:
5259 /* The SHA-1/SHA-256 3-register instructions require special
5260 * treatment here, as their size field is overloaded as an
5261 * op type selector, and they all consume their input in a
5262 * single pass.
5263 */
5264 if (!q) {
5265 return 1;
5266 }
5267 if (!u) { /* SHA-1 */
5268 if (!dc_isar_feature(aa32_sha1, s)) {
5269 return 1;
5270 }
5271 ptr1 = vfp_reg_ptr(true, rd);
5272 ptr2 = vfp_reg_ptr(true, rn);
5273 ptr3 = vfp_reg_ptr(true, rm);
5274 tmp4 = tcg_const_i32(size);
5275 gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
5276 tcg_temp_free_i32(tmp4);
5277 } else { /* SHA-256 */
5278 if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
5279 return 1;
5280 }
5281 ptr1 = vfp_reg_ptr(true, rd);
5282 ptr2 = vfp_reg_ptr(true, rn);
5283 ptr3 = vfp_reg_ptr(true, rm);
5284 switch (size) {
5285 case 0:
5286 gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
5287 break;
5288 case 1:
5289 gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
5290 break;
5291 case 2:
5292 gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
5293 break;
5294 }
5295 }
5296 tcg_temp_free_ptr(ptr1);
5297 tcg_temp_free_ptr(ptr2);
5298 tcg_temp_free_ptr(ptr3);
5299 return 0;
5300
5301 case NEON_3R_VPADD_VQRDMLAH:
5302 if (!u) {
5303 break; /* VPADD */
5304 }
5305 /* VQRDMLAH */
5306 switch (size) {
5307 case 1:
5308 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
5309 q, rd, rn, rm);
5310 case 2:
5311 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
5312 q, rd, rn, rm);
5313 }
5314 return 1;
5315
5316 case NEON_3R_VFM_VQRDMLSH:
5317 if (!u) {
5318 /* VFM, VFMS */
5319 if (size == 1) {
5320 return 1;
5321 }
5322 break;
5323 }
5324 /* VQRDMLSH */
5325 switch (size) {
5326 case 1:
5327 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
5328 q, rd, rn, rm);
5329 case 2:
5330 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
5331 q, rd, rn, rm);
5332 }
5333 return 1;
5334
5335 case NEON_3R_VADD_VSUB:
5336 case NEON_3R_LOGIC:
5337 case NEON_3R_VMAX:
5338 case NEON_3R_VMIN:
5339 case NEON_3R_VTST_VCEQ:
5340 case NEON_3R_VCGT:
5341 case NEON_3R_VCGE:
5342 case NEON_3R_VQADD:
5343 case NEON_3R_VQSUB:
5344 case NEON_3R_VMUL:
5345 case NEON_3R_VML:
5346 case NEON_3R_VSHL:
5347 /* Already handled by decodetree */
5348 return 1;
5349 }
5350
5351 if (size == 3) {
5352 /* 64-bit element instructions. */
5353 for (pass = 0; pass < (q ? 2 : 1); pass++) {
5354 neon_load_reg64(cpu_V0, rn + pass);
5355 neon_load_reg64(cpu_V1, rm + pass);
5356 switch (op) {
5357 case NEON_3R_VQSHL:
5358 if (u) {
5359 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5360 cpu_V1, cpu_V0);
5361 } else {
5362 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5363 cpu_V1, cpu_V0);
5364 }
5365 break;
5366 case NEON_3R_VRSHL:
5367 if (u) {
5368 gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
5369 } else {
5370 gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
5371 }
5372 break;
5373 case NEON_3R_VQRSHL:
5374 if (u) {
5375 gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
5376 cpu_V1, cpu_V0);
5377 } else {
5378 gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
5379 cpu_V1, cpu_V0);
5380 }
5381 break;
5382 default:
5383 abort();
5384 }
5385 neon_store_reg64(cpu_V0, rd + pass);
5386 }
5387 return 0;
5388 }
5389 pairwise = 0;
5390 switch (op) {
5391 case NEON_3R_VQSHL:
5392 case NEON_3R_VRSHL:
5393 case NEON_3R_VQRSHL:
5394 {
5395 int rtmp;
5396 /* Shift instruction operands are reversed. */
5397 rtmp = rn;
5398 rn = rm;
5399 rm = rtmp;
5400 }
5401 break;
5402 case NEON_3R_VPADD_VQRDMLAH:
5403 case NEON_3R_VPMAX:
5404 case NEON_3R_VPMIN:
5405 pairwise = 1;
5406 break;
5407 case NEON_3R_FLOAT_ARITH:
5408 pairwise = (u && size < 2); /* if VPADD (float) */
5409 break;
5410 case NEON_3R_FLOAT_MINMAX:
5411 pairwise = u; /* if VPMIN/VPMAX (float) */
5412 break;
5413 case NEON_3R_FLOAT_CMP:
5414 if (!u && size) {
5415 /* no encoding for U=0 C=1x */
5416 return 1;
5417 }
5418 break;
5419 case NEON_3R_FLOAT_ACMP:
5420 if (!u) {
5421 return 1;
5422 }
5423 break;
5424 case NEON_3R_FLOAT_MISC:
5425 /* VMAXNM/VMINNM in ARMv8 */
5426 if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5427 return 1;
5428 }
5429 break;
5430 case NEON_3R_VFM_VQRDMLSH:
5431 if (!dc_isar_feature(aa32_simdfmac, s)) {
5432 return 1;
5433 }
5434 break;
5435 default:
5436 break;
5437 }
5438
5439 if (pairwise && q) {
5440 /* All the pairwise insns UNDEF if Q is set */
5441 return 1;
5442 }
5443
5444 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5445
5446 if (pairwise) {
5447 /* Pairwise. */
5448 if (pass < 1) {
5449 tmp = neon_load_reg(rn, 0);
5450 tmp2 = neon_load_reg(rn, 1);
5451 } else {
5452 tmp = neon_load_reg(rm, 0);
5453 tmp2 = neon_load_reg(rm, 1);
5454 }
5455 } else {
5456 /* Elementwise. */
5457 tmp = neon_load_reg(rn, pass);
5458 tmp2 = neon_load_reg(rm, pass);
5459 }
5460 switch (op) {
5461 case NEON_3R_VHADD:
5462 GEN_NEON_INTEGER_OP(hadd);
5463 break;
5464 case NEON_3R_VRHADD:
5465 GEN_NEON_INTEGER_OP(rhadd);
5466 break;
5467 case NEON_3R_VHSUB:
5468 GEN_NEON_INTEGER_OP(hsub);
5469 break;
5470 case NEON_3R_VQSHL:
5471 GEN_NEON_INTEGER_OP_ENV(qshl);
5472 break;
5473 case NEON_3R_VRSHL:
5474 GEN_NEON_INTEGER_OP(rshl);
5475 break;
5476 case NEON_3R_VQRSHL:
5477 GEN_NEON_INTEGER_OP_ENV(qrshl);
5478 break;
5479 case NEON_3R_VABD:
5480 GEN_NEON_INTEGER_OP(abd);
5481 break;
5482 case NEON_3R_VABA:
5483 GEN_NEON_INTEGER_OP(abd);
5484 tcg_temp_free_i32(tmp2);
5485 tmp2 = neon_load_reg(rd, pass);
5486 gen_neon_add(size, tmp, tmp2);
5487 break;
5488 case NEON_3R_VPMAX:
5489 GEN_NEON_INTEGER_OP(pmax);
5490 break;
5491 case NEON_3R_VPMIN:
5492 GEN_NEON_INTEGER_OP(pmin);
5493 break;
5494 case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
5495 if (!u) { /* VQDMULH */
5496 switch (size) {
5497 case 1:
5498 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5499 break;
5500 case 2:
5501 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5502 break;
5503 default: abort();
5504 }
5505 } else { /* VQRDMULH */
5506 switch (size) {
5507 case 1:
5508 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5509 break;
5510 case 2:
5511 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5512 break;
5513 default: abort();
5514 }
5515 }
5516 break;
5517 case NEON_3R_VPADD_VQRDMLAH:
5518 switch (size) {
5519 case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
5520 case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
5521 case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
5522 default: abort();
5523 }
5524 break;
5525 case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5526 {
5527 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5528 switch ((u << 2) | size) {
5529 case 0: /* VADD */
5530 case 4: /* VPADD */
5531 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5532 break;
5533 case 2: /* VSUB */
5534 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
5535 break;
5536 case 6: /* VABD */
5537 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
5538 break;
5539 default:
5540 abort();
5541 }
5542 tcg_temp_free_ptr(fpstatus);
5543 break;
5544 }
5545 case NEON_3R_FLOAT_MULTIPLY:
5546 {
5547 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5548 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5549 if (!u) {
5550 tcg_temp_free_i32(tmp2);
5551 tmp2 = neon_load_reg(rd, pass);
5552 if (size == 0) {
5553 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5554 } else {
5555 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5556 }
5557 }
5558 tcg_temp_free_ptr(fpstatus);
5559 break;
5560 }
5561 case NEON_3R_FLOAT_CMP:
5562 {
5563 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5564 if (!u) {
5565 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5566 } else {
5567 if (size == 0) {
5568 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5569 } else {
5570 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5571 }
5572 }
5573 tcg_temp_free_ptr(fpstatus);
5574 break;
5575 }
5576 case NEON_3R_FLOAT_ACMP:
5577 {
5578 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5579 if (size == 0) {
5580 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5581 } else {
5582 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5583 }
5584 tcg_temp_free_ptr(fpstatus);
5585 break;
5586 }
5587 case NEON_3R_FLOAT_MINMAX:
5588 {
5589 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5590 if (size == 0) {
5591 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5592 } else {
5593 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5594 }
5595 tcg_temp_free_ptr(fpstatus);
5596 break;
5597 }
5598 case NEON_3R_FLOAT_MISC:
5599 if (u) {
5600 /* VMAXNM/VMINNM */
5601 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5602 if (size == 0) {
5603 gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5604 } else {
5605 gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5606 }
5607 tcg_temp_free_ptr(fpstatus);
5608 } else {
5609 if (size == 0) {
5610 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5611 } else {
5612 gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5613 }
5614 }
5615 break;
5616 case NEON_3R_VFM_VQRDMLSH:
5617 {
5618 /* VFMA, VFMS: fused multiply-add */
5619 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5620 TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5621 if (size) {
5622 /* VFMS */
5623 gen_helper_vfp_negs(tmp, tmp);
5624 }
5625 gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5626 tcg_temp_free_i32(tmp3);
5627 tcg_temp_free_ptr(fpstatus);
5628 break;
5629 }
5630 default:
5631 abort();
5632 }
5633 tcg_temp_free_i32(tmp2);
5634
5635 /* Save the result. For elementwise operations we can put it
5636 straight into the destination register. For pairwise operations
5637 we have to be careful to avoid clobbering the source operands. */
5638 if (pairwise && rd == rm) {
5639 neon_store_scratch(pass, tmp);
5640 } else {
5641 neon_store_reg(rd, pass, tmp);
5642 }
5643
5644 } /* for pass */
5645 if (pairwise && rd == rm) {
5646 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5647 tmp = neon_load_scratch(pass);
5648 neon_store_reg(rd, pass, tmp);
5649 }
5650 }
5651 /* End of 3 register same size operations. */
5652 } else if (insn & (1 << 4)) {
5653 if ((insn & 0x00380080) != 0) {
5654 /* Two registers and shift. */
5655 op = (insn >> 8) & 0xf;
5656 if (insn & (1 << 7)) {
5657 /* 64-bit shift. */
5658 if (op > 7) {
5659 return 1;
5660 }
5661 size = 3;
5662 } else {
5663 size = 2;
5664 while ((insn & (1 << (size + 19))) == 0)
5665 size--;
5666 }
5667 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5668 if (op < 8) {
5669 /* Shift by immediate:
5670 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
5671 if (q && ((rd | rm) & 1)) {
5672 return 1;
5673 }
5674 if (!u && (op == 4 || op == 6)) {
5675 return 1;
5676 }
5677 /* Right shifts are encoded as N - shift, where N is the
5678 element size in bits. */
5679 if (op <= 4) {
5680 shift = shift - (1 << (size + 3));
5681 }
5682
5683 switch (op) {
5684 case 0: /* VSHR */
5685 /* Right shift comes here negative. */
5686 shift = -shift;
5687 /* Shifts larger than the element size are architecturally
5688 * valid. Unsigned results in all zeros; signed results
5689 * in all sign bits.
5690 */
5691 if (!u) {
5692 tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5693 MIN(shift, (8 << size) - 1),
5694 vec_size, vec_size);
5695 } else if (shift >= 8 << size) {
5696 tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
5697 vec_size, 0);
5698 } else {
5699 tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5700 vec_size, vec_size);
5701 }
5702 return 0;
5703
5704 case 1: /* VSRA */
5705 /* Right shift comes here negative. */
5706 shift = -shift;
5707 if (u) {
5708 gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
5709 vec_size, vec_size);
5710 } else {
5711 gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
5712 vec_size, vec_size);
5713 }
5714 return 0;
5715
5716 case 2: /* VRSHR */
5717 /* Right shift comes here negative. */
5718 shift = -shift;
5719 if (u) {
5720 gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
5721 vec_size, vec_size);
5722 } else {
5723 gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
5724 vec_size, vec_size);
5725 }
5726 return 0;
5727
5728 case 3: /* VRSRA */
5729 /* Right shift comes here negative. */
5730 shift = -shift;
5731 if (u) {
5732 gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
5733 vec_size, vec_size);
5734 } else {
5735 gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
5736 vec_size, vec_size);
5737 }
5738 return 0;
5739
5740 case 4: /* VSRI */
5741 if (!u) {
5742 return 1;
5743 }
5744 /* Right shift comes here negative. */
5745 shift = -shift;
5746 gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
5747 vec_size, vec_size);
5748 return 0;
5749
5750 case 5: /* VSHL, VSLI */
5751 if (u) { /* VSLI */
5752 gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
5753 vec_size, vec_size);
5754 } else { /* VSHL */
5755 tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5756 vec_size, vec_size);
5757 }
5758 return 0;
5759 }
5760
5761 if (size == 3) {
5762 count = q + 1;
5763 } else {
5764 count = q ? 4: 2;
5765 }
5766
5767 /* To avoid excessive duplication of ops we implement shift
5768 * by immediate using the variable shift operations.
5769 */
5770 imm = dup_const(size, shift);
5771
5772 for (pass = 0; pass < count; pass++) {
5773 if (size == 3) {
5774 neon_load_reg64(cpu_V0, rm + pass);
5775 tcg_gen_movi_i64(cpu_V1, imm);
5776 switch (op) {
5777 case 6: /* VQSHLU */
5778 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5779 cpu_V0, cpu_V1);
5780 break;
5781 case 7: /* VQSHL */
5782 if (u) {
5783 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5784 cpu_V0, cpu_V1);
5785 } else {
5786 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5787 cpu_V0, cpu_V1);
5788 }
5789 break;
5790 default:
5791 g_assert_not_reached();
5792 }
5793 neon_store_reg64(cpu_V0, rd + pass);
5794 } else { /* size < 3 */
5795 /* Operands in T0 and T1. */
5796 tmp = neon_load_reg(rm, pass);
5797 tmp2 = tcg_temp_new_i32();
5798 tcg_gen_movi_i32(tmp2, imm);
5799 switch (op) {
5800 case 6: /* VQSHLU */
5801 switch (size) {
5802 case 0:
5803 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5804 tmp, tmp2);
5805 break;
5806 case 1:
5807 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5808 tmp, tmp2);
5809 break;
5810 case 2:
5811 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5812 tmp, tmp2);
5813 break;
5814 default:
5815 abort();
5816 }
5817 break;
5818 case 7: /* VQSHL */
5819 GEN_NEON_INTEGER_OP_ENV(qshl);
5820 break;
5821 default:
5822 g_assert_not_reached();
5823 }
5824 tcg_temp_free_i32(tmp2);
5825 neon_store_reg(rd, pass, tmp);
5826 }
5827 } /* for pass */
5828 } else if (op < 10) {
5829 /* Shift by immediate and narrow:
5830 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
5831 int input_unsigned = (op == 8) ? !u : u;
5832 if (rm & 1) {
5833 return 1;
5834 }
5835 shift = shift - (1 << (size + 3));
5836 size++;
5837 if (size == 3) {
5838 tmp64 = tcg_const_i64(shift);
5839 neon_load_reg64(cpu_V0, rm);
5840 neon_load_reg64(cpu_V1, rm + 1);
5841 for (pass = 0; pass < 2; pass++) {
5842 TCGv_i64 in;
5843 if (pass == 0) {
5844 in = cpu_V0;
5845 } else {
5846 in = cpu_V1;
5847 }
5848 if (q) {
5849 if (input_unsigned) {
5850 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5851 } else {
5852 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5853 }
5854 } else {
5855 if (input_unsigned) {
5856 gen_ushl_i64(cpu_V0, in, tmp64);
5857 } else {
5858 gen_sshl_i64(cpu_V0, in, tmp64);
5859 }
5860 }
5861 tmp = tcg_temp_new_i32();
5862 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5863 neon_store_reg(rd, pass, tmp);
5864 } /* for pass */
5865 tcg_temp_free_i64(tmp64);
5866 } else {
5867 if (size == 1) {
5868 imm = (uint16_t)shift;
5869 imm |= imm << 16;
5870 } else {
5871 /* size == 2 */
5872 imm = (uint32_t)shift;
5873 }
5874 tmp2 = tcg_const_i32(imm);
5875 tmp4 = neon_load_reg(rm + 1, 0);
5876 tmp5 = neon_load_reg(rm + 1, 1);
5877 for (pass = 0; pass < 2; pass++) {
5878 if (pass == 0) {
5879 tmp = neon_load_reg(rm, 0);
5880 } else {
5881 tmp = tmp4;
5882 }
5883 gen_neon_shift_narrow(size, tmp, tmp2, q,
5884 input_unsigned);
5885 if (pass == 0) {
5886 tmp3 = neon_load_reg(rm, 1);
5887 } else {
5888 tmp3 = tmp5;
5889 }
5890 gen_neon_shift_narrow(size, tmp3, tmp2, q,
5891 input_unsigned);
5892 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5893 tcg_temp_free_i32(tmp);
5894 tcg_temp_free_i32(tmp3);
5895 tmp = tcg_temp_new_i32();
5896 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5897 neon_store_reg(rd, pass, tmp);
5898 } /* for pass */
5899 tcg_temp_free_i32(tmp2);
5900 }
5901 } else if (op == 10) {
5902 /* VSHLL, VMOVL */
5903 if (q || (rd & 1)) {
5904 return 1;
5905 }
5906 tmp = neon_load_reg(rm, 0);
5907 tmp2 = neon_load_reg(rm, 1);
5908 for (pass = 0; pass < 2; pass++) {
5909 if (pass == 1)
5910 tmp = tmp2;
5911
5912 gen_neon_widen(cpu_V0, tmp, size, u);
5913
5914 if (shift != 0) {
5915 /* The shift is less than the width of the source
5916 type, so we can just shift the whole register. */
5917 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5918 /* Widen the result of shift: we need to clear
5919 * the potential overflow bits resulting from
5920 * left bits of the narrow input appearing as
5921 * right bits of left the neighbour narrow
5922 * input. */
5923 if (size < 2 || !u) {
5924 uint64_t imm64;
5925 if (size == 0) {
5926 imm = (0xffu >> (8 - shift));
5927 imm |= imm << 16;
5928 } else if (size == 1) {
5929 imm = 0xffff >> (16 - shift);
5930 } else {
5931 /* size == 2 */
5932 imm = 0xffffffff >> (32 - shift);
5933 }
5934 if (size < 2) {
5935 imm64 = imm | (((uint64_t)imm) << 32);
5936 } else {
5937 imm64 = imm;
5938 }
5939 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5940 }
5941 }
5942 neon_store_reg64(cpu_V0, rd + pass);
5943 }
5944 } else if (op >= 14) {
5945 /* VCVT fixed-point. */
5946 TCGv_ptr fpst;
5947 TCGv_i32 shiftv;
5948 VFPGenFixPointFn *fn;
5949
5950 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5951 return 1;
5952 }
5953
5954 if (!(op & 1)) {
5955 if (u) {
5956 fn = gen_helper_vfp_ultos;
5957 } else {
5958 fn = gen_helper_vfp_sltos;
5959 }
5960 } else {
5961 if (u) {
5962 fn = gen_helper_vfp_touls_round_to_zero;
5963 } else {
5964 fn = gen_helper_vfp_tosls_round_to_zero;
5965 }
5966 }
5967
5968 /* We have already masked out the must-be-1 top bit of imm6,
5969 * hence this 32-shift where the ARM ARM has 64-imm6.
5970 */
5971 shift = 32 - shift;
5972 fpst = get_fpstatus_ptr(1);
5973 shiftv = tcg_const_i32(shift);
5974 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5975 TCGv_i32 tmpf = neon_load_reg(rm, pass);
5976 fn(tmpf, tmpf, shiftv, fpst);
5977 neon_store_reg(rd, pass, tmpf);
5978 }
5979 tcg_temp_free_ptr(fpst);
5980 tcg_temp_free_i32(shiftv);
5981 } else {
5982 return 1;
5983 }
5984 } else { /* (insn & 0x00380080) == 0 */
5985 int invert, reg_ofs, vec_size;
5986
5987 if (q && (rd & 1)) {
5988 return 1;
5989 }
5990
5991 op = (insn >> 8) & 0xf;
5992 /* One register and immediate. */
5993 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5994 invert = (insn & (1 << 5)) != 0;
5995 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5996 * We choose to not special-case this and will behave as if a
5997 * valid constant encoding of 0 had been given.
5998 */
5999 switch (op) {
6000 case 0: case 1:
6001 /* no-op */
6002 break;
6003 case 2: case 3:
6004 imm <<= 8;
6005 break;
6006 case 4: case 5:
6007 imm <<= 16;
6008 break;
6009 case 6: case 7:
6010 imm <<= 24;
6011 break;
6012 case 8: case 9:
6013 imm |= imm << 16;
6014 break;
6015 case 10: case 11:
6016 imm = (imm << 8) | (imm << 24);
6017 break;
6018 case 12:
6019 imm = (imm << 8) | 0xff;
6020 break;
6021 case 13:
6022 imm = (imm << 16) | 0xffff;
6023 break;
6024 case 14:
6025 imm |= (imm << 8) | (imm << 16) | (imm << 24);
6026 if (invert) {
6027 imm = ~imm;
6028 }
6029 break;
6030 case 15:
6031 if (invert) {
6032 return 1;
6033 }
6034 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
6035 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
6036 break;
6037 }
6038 if (invert) {
6039 imm = ~imm;
6040 }
6041
6042 reg_ofs = neon_reg_offset(rd, 0);
6043 vec_size = q ? 16 : 8;
6044
6045 if (op & 1 && op < 12) {
6046 if (invert) {
6047 /* The immediate value has already been inverted,
6048 * so BIC becomes AND.
6049 */
6050 tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
6051 vec_size, vec_size);
6052 } else {
6053 tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
6054 vec_size, vec_size);
6055 }
6056 } else {
6057 /* VMOV, VMVN. */
6058 if (op == 14 && invert) {
6059 TCGv_i64 t64 = tcg_temp_new_i64();
6060
6061 for (pass = 0; pass <= q; ++pass) {
6062 uint64_t val = 0;
6063 int n;
6064
6065 for (n = 0; n < 8; n++) {
6066 if (imm & (1 << (n + pass * 8))) {
6067 val |= 0xffull << (n * 8);
6068 }
6069 }
6070 tcg_gen_movi_i64(t64, val);
6071 neon_store_reg64(t64, rd + pass);
6072 }
6073 tcg_temp_free_i64(t64);
6074 } else {
6075 tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
6076 vec_size, imm);
6077 }
6078 }
6079 }
6080 } else { /* (insn & 0x00800010 == 0x00800000) */
6081 if (size != 3) {
6082 op = (insn >> 8) & 0xf;
6083 if ((insn & (1 << 6)) == 0) {
6084 /* Three registers of different lengths. */
6085 int src1_wide;
6086 int src2_wide;
6087 int prewiden;
6088 /* undefreq: bit 0 : UNDEF if size == 0
6089 * bit 1 : UNDEF if size == 1
6090 * bit 2 : UNDEF if size == 2
6091 * bit 3 : UNDEF if U == 1
6092 * Note that [2:0] set implies 'always UNDEF'
6093 */
6094 int undefreq;
6095 /* prewiden, src1_wide, src2_wide, undefreq */
6096 static const int neon_3reg_wide[16][4] = {
6097 {1, 0, 0, 0}, /* VADDL */
6098 {1, 1, 0, 0}, /* VADDW */
6099 {1, 0, 0, 0}, /* VSUBL */
6100 {1, 1, 0, 0}, /* VSUBW */
6101 {0, 1, 1, 0}, /* VADDHN */
6102 {0, 0, 0, 0}, /* VABAL */
6103 {0, 1, 1, 0}, /* VSUBHN */
6104 {0, 0, 0, 0}, /* VABDL */
6105 {0, 0, 0, 0}, /* VMLAL */
6106 {0, 0, 0, 9}, /* VQDMLAL */
6107 {0, 0, 0, 0}, /* VMLSL */
6108 {0, 0, 0, 9}, /* VQDMLSL */
6109 {0, 0, 0, 0}, /* Integer VMULL */
6110 {0, 0, 0, 9}, /* VQDMULL */
6111 {0, 0, 0, 0xa}, /* Polynomial VMULL */
6112 {0, 0, 0, 7}, /* Reserved: always UNDEF */
6113 };
6114
6115 prewiden = neon_3reg_wide[op][0];
6116 src1_wide = neon_3reg_wide[op][1];
6117 src2_wide = neon_3reg_wide[op][2];
6118 undefreq = neon_3reg_wide[op][3];
6119
6120 if ((undefreq & (1 << size)) ||
6121 ((undefreq & 8) && u)) {
6122 return 1;
6123 }
6124 if ((src1_wide && (rn & 1)) ||
6125 (src2_wide && (rm & 1)) ||
6126 (!src2_wide && (rd & 1))) {
6127 return 1;
6128 }
6129
6130 /* Handle polynomial VMULL in a single pass. */
6131 if (op == 14) {
6132 if (size == 0) {
6133 /* VMULL.P8 */
6134 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6135 0, gen_helper_neon_pmull_h);
6136 } else {
6137 /* VMULL.P64 */
6138 if (!dc_isar_feature(aa32_pmull, s)) {
6139 return 1;
6140 }
6141 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6142 0, gen_helper_gvec_pmull_q);
6143 }
6144 return 0;
6145 }
6146
6147 /* Avoid overlapping operands. Wide source operands are
6148 always aligned so will never overlap with wide
6149 destinations in problematic ways. */
6150 if (rd == rm && !src2_wide) {
6151 tmp = neon_load_reg(rm, 1);
6152 neon_store_scratch(2, tmp);
6153 } else if (rd == rn && !src1_wide) {
6154 tmp = neon_load_reg(rn, 1);
6155 neon_store_scratch(2, tmp);
6156 }
6157 tmp3 = NULL;
6158 for (pass = 0; pass < 2; pass++) {
6159 if (src1_wide) {
6160 neon_load_reg64(cpu_V0, rn + pass);
6161 tmp = NULL;
6162 } else {
6163 if (pass == 1 && rd == rn) {
6164 tmp = neon_load_scratch(2);
6165 } else {
6166 tmp = neon_load_reg(rn, pass);
6167 }
6168 if (prewiden) {
6169 gen_neon_widen(cpu_V0, tmp, size, u);
6170 }
6171 }
6172 if (src2_wide) {
6173 neon_load_reg64(cpu_V1, rm + pass);
6174 tmp2 = NULL;
6175 } else {
6176 if (pass == 1 && rd == rm) {
6177 tmp2 = neon_load_scratch(2);
6178 } else {
6179 tmp2 = neon_load_reg(rm, pass);
6180 }
6181 if (prewiden) {
6182 gen_neon_widen(cpu_V1, tmp2, size, u);
6183 }
6184 }
6185 switch (op) {
6186 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6187 gen_neon_addl(size);
6188 break;
6189 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6190 gen_neon_subl(size);
6191 break;
6192 case 5: case 7: /* VABAL, VABDL */
6193 switch ((size << 1) | u) {
6194 case 0:
6195 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6196 break;
6197 case 1:
6198 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6199 break;
6200 case 2:
6201 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6202 break;
6203 case 3:
6204 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6205 break;
6206 case 4:
6207 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6208 break;
6209 case 5:
6210 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6211 break;
6212 default: abort();
6213 }
6214 tcg_temp_free_i32(tmp2);
6215 tcg_temp_free_i32(tmp);
6216 break;
6217 case 8: case 9: case 10: case 11: case 12: case 13:
6218 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6219 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6220 break;
6221 default: /* 15 is RESERVED: caught earlier */
6222 abort();
6223 }
6224 if (op == 13) {
6225 /* VQDMULL */
6226 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6227 neon_store_reg64(cpu_V0, rd + pass);
6228 } else if (op == 5 || (op >= 8 && op <= 11)) {
6229 /* Accumulate. */
6230 neon_load_reg64(cpu_V1, rd + pass);
6231 switch (op) {
6232 case 10: /* VMLSL */
6233 gen_neon_negl(cpu_V0, size);
6234 /* Fall through */
6235 case 5: case 8: /* VABAL, VMLAL */
6236 gen_neon_addl(size);
6237 break;
6238 case 9: case 11: /* VQDMLAL, VQDMLSL */
6239 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6240 if (op == 11) {
6241 gen_neon_negl(cpu_V0, size);
6242 }
6243 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6244 break;
6245 default:
6246 abort();
6247 }
6248 neon_store_reg64(cpu_V0, rd + pass);
6249 } else if (op == 4 || op == 6) {
6250 /* Narrowing operation. */
6251 tmp = tcg_temp_new_i32();
6252 if (!u) {
6253 switch (size) {
6254 case 0:
6255 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6256 break;
6257 case 1:
6258 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6259 break;
6260 case 2:
6261 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6262 break;
6263 default: abort();
6264 }
6265 } else {
6266 switch (size) {
6267 case 0:
6268 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6269 break;
6270 case 1:
6271 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6272 break;
6273 case 2:
6274 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6275 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6276 break;
6277 default: abort();
6278 }
6279 }
6280 if (pass == 0) {
6281 tmp3 = tmp;
6282 } else {
6283 neon_store_reg(rd, 0, tmp3);
6284 neon_store_reg(rd, 1, tmp);
6285 }
6286 } else {
6287 /* Write back the result. */
6288 neon_store_reg64(cpu_V0, rd + pass);
6289 }
6290 }
6291 } else {
6292 /* Two registers and a scalar. NB that for ops of this form
6293 * the ARM ARM labels bit 24 as Q, but it is in our variable
6294 * 'u', not 'q'.
6295 */
6296 if (size == 0) {
6297 return 1;
6298 }
6299 switch (op) {
6300 case 1: /* Float VMLA scalar */
6301 case 5: /* Floating point VMLS scalar */
6302 case 9: /* Floating point VMUL scalar */
6303 if (size == 1) {
6304 return 1;
6305 }
6306 /* fall through */
6307 case 0: /* Integer VMLA scalar */
6308 case 4: /* Integer VMLS scalar */
6309 case 8: /* Integer VMUL scalar */
6310 case 12: /* VQDMULH scalar */
6311 case 13: /* VQRDMULH scalar */
6312 if (u && ((rd | rn) & 1)) {
6313 return 1;
6314 }
6315 tmp = neon_get_scalar(size, rm);
6316 neon_store_scratch(0, tmp);
6317 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6318 tmp = neon_load_scratch(0);
6319 tmp2 = neon_load_reg(rn, pass);
6320 if (op == 12) {
6321 if (size == 1) {
6322 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6323 } else {
6324 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6325 }
6326 } else if (op == 13) {
6327 if (size == 1) {
6328 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6329 } else {
6330 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6331 }
6332 } else if (op & 1) {
6333 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6334 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6335 tcg_temp_free_ptr(fpstatus);
6336 } else {
6337 switch (size) {
6338 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6339 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6340 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6341 default: abort();
6342 }
6343 }
6344 tcg_temp_free_i32(tmp2);
6345 if (op < 8) {
6346 /* Accumulate. */
6347 tmp2 = neon_load_reg(rd, pass);
6348 switch (op) {
6349 case 0:
6350 gen_neon_add(size, tmp, tmp2);
6351 break;
6352 case 1:
6353 {
6354 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6355 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6356 tcg_temp_free_ptr(fpstatus);
6357 break;
6358 }
6359 case 4:
6360 gen_neon_rsb(size, tmp, tmp2);
6361 break;
6362 case 5:
6363 {
6364 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6365 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6366 tcg_temp_free_ptr(fpstatus);
6367 break;
6368 }
6369 default:
6370 abort();
6371 }
6372 tcg_temp_free_i32(tmp2);
6373 }
6374 neon_store_reg(rd, pass, tmp);
6375 }
6376 break;
6377 case 3: /* VQDMLAL scalar */
6378 case 7: /* VQDMLSL scalar */
6379 case 11: /* VQDMULL scalar */
6380 if (u == 1) {
6381 return 1;
6382 }
6383 /* fall through */
6384 case 2: /* VMLAL sclar */
6385 case 6: /* VMLSL scalar */
6386 case 10: /* VMULL scalar */
6387 if (rd & 1) {
6388 return 1;
6389 }
6390 tmp2 = neon_get_scalar(size, rm);
6391 /* We need a copy of tmp2 because gen_neon_mull
6392 * deletes it during pass 0. */
6393 tmp4 = tcg_temp_new_i32();
6394 tcg_gen_mov_i32(tmp4, tmp2);
6395 tmp3 = neon_load_reg(rn, 1);
6396
6397 for (pass = 0; pass < 2; pass++) {
6398 if (pass == 0) {
6399 tmp = neon_load_reg(rn, 0);
6400 } else {
6401 tmp = tmp3;
6402 tmp2 = tmp4;
6403 }
6404 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6405 if (op != 11) {
6406 neon_load_reg64(cpu_V1, rd + pass);
6407 }
6408 switch (op) {
6409 case 6:
6410 gen_neon_negl(cpu_V0, size);
6411 /* Fall through */
6412 case 2:
6413 gen_neon_addl(size);
6414 break;
6415 case 3: case 7:
6416 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6417 if (op == 7) {
6418 gen_neon_negl(cpu_V0, size);
6419 }
6420 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6421 break;
6422 case 10:
6423 /* no-op */
6424 break;
6425 case 11:
6426 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6427 break;
6428 default:
6429 abort();
6430 }
6431 neon_store_reg64(cpu_V0, rd + pass);
6432 }
6433 break;
6434 case 14: /* VQRDMLAH scalar */
6435 case 15: /* VQRDMLSH scalar */
6436 {
6437 NeonGenThreeOpEnvFn *fn;
6438
6439 if (!dc_isar_feature(aa32_rdm, s)) {
6440 return 1;
6441 }
6442 if (u && ((rd | rn) & 1)) {
6443 return 1;
6444 }
6445 if (op == 14) {
6446 if (size == 1) {
6447 fn = gen_helper_neon_qrdmlah_s16;
6448 } else {
6449 fn = gen_helper_neon_qrdmlah_s32;
6450 }
6451 } else {
6452 if (size == 1) {
6453 fn = gen_helper_neon_qrdmlsh_s16;
6454 } else {
6455 fn = gen_helper_neon_qrdmlsh_s32;
6456 }
6457 }
6458
6459 tmp2 = neon_get_scalar(size, rm);
6460 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6461 tmp = neon_load_reg(rn, pass);
6462 tmp3 = neon_load_reg(rd, pass);
6463 fn(tmp, cpu_env, tmp, tmp2, tmp3);
6464 tcg_temp_free_i32(tmp3);
6465 neon_store_reg(rd, pass, tmp);
6466 }
6467 tcg_temp_free_i32(tmp2);
6468 }
6469 break;
6470 default:
6471 g_assert_not_reached();
6472 }
6473 }
6474 } else { /* size == 3 */
6475 if (!u) {
6476 /* Extract. */
6477 imm = (insn >> 8) & 0xf;
6478
6479 if (imm > 7 && !q)
6480 return 1;
6481
6482 if (q && ((rd | rn | rm) & 1)) {
6483 return 1;
6484 }
6485
6486 if (imm == 0) {
6487 neon_load_reg64(cpu_V0, rn);
6488 if (q) {
6489 neon_load_reg64(cpu_V1, rn + 1);
6490 }
6491 } else if (imm == 8) {
6492 neon_load_reg64(cpu_V0, rn + 1);
6493 if (q) {
6494 neon_load_reg64(cpu_V1, rm);
6495 }
6496 } else if (q) {
6497 tmp64 = tcg_temp_new_i64();
6498 if (imm < 8) {
6499 neon_load_reg64(cpu_V0, rn);
6500 neon_load_reg64(tmp64, rn + 1);
6501 } else {
6502 neon_load_reg64(cpu_V0, rn + 1);
6503 neon_load_reg64(tmp64, rm);
6504 }
6505 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6506 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6507 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6508 if (imm < 8) {
6509 neon_load_reg64(cpu_V1, rm);
6510 } else {
6511 neon_load_reg64(cpu_V1, rm + 1);
6512 imm -= 8;
6513 }
6514 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6515 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6516 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6517 tcg_temp_free_i64(tmp64);
6518 } else {
6519 /* BUGFIX */
6520 neon_load_reg64(cpu_V0, rn);
6521 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6522 neon_load_reg64(cpu_V1, rm);
6523 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6524 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6525 }
6526 neon_store_reg64(cpu_V0, rd);
6527 if (q) {
6528 neon_store_reg64(cpu_V1, rd + 1);
6529 }
6530 } else if ((insn & (1 << 11)) == 0) {
6531 /* Two register misc. */
6532 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6533 size = (insn >> 18) & 3;
6534 /* UNDEF for unknown op values and bad op-size combinations */
6535 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6536 return 1;
6537 }
6538 if (neon_2rm_is_v8_op(op) &&
6539 !arm_dc_feature(s, ARM_FEATURE_V8)) {
6540 return 1;
6541 }
6542 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6543 q && ((rm | rd) & 1)) {
6544 return 1;
6545 }
6546 switch (op) {
6547 case NEON_2RM_VREV64:
6548 for (pass = 0; pass < (q ? 2 : 1); pass++) {
6549 tmp = neon_load_reg(rm, pass * 2);
6550 tmp2 = neon_load_reg(rm, pass * 2 + 1);
6551 switch (size) {
6552 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6553 case 1: gen_swap_half(tmp); break;
6554 case 2: /* no-op */ break;
6555 default: abort();
6556 }
6557 neon_store_reg(rd, pass * 2 + 1, tmp);
6558 if (size == 2) {
6559 neon_store_reg(rd, pass * 2, tmp2);
6560 } else {
6561 switch (size) {
6562 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6563 case 1: gen_swap_half(tmp2); break;
6564 default: abort();
6565 }
6566 neon_store_reg(rd, pass * 2, tmp2);
6567 }
6568 }
6569 break;
6570 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6571 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6572 for (pass = 0; pass < q + 1; pass++) {
6573 tmp = neon_load_reg(rm, pass * 2);
6574 gen_neon_widen(cpu_V0, tmp, size, op & 1);
6575 tmp = neon_load_reg(rm, pass * 2 + 1);
6576 gen_neon_widen(cpu_V1, tmp, size, op & 1);
6577 switch (size) {
6578 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6579 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6580 case 2: tcg_gen_add_i64(CPU_V001); break;
6581 default: abort();
6582 }
6583 if (op >= NEON_2RM_VPADAL) {
6584 /* Accumulate. */
6585 neon_load_reg64(cpu_V1, rd + pass);
6586 gen_neon_addl(size);
6587 }
6588 neon_store_reg64(cpu_V0, rd + pass);
6589 }
6590 break;
6591 case NEON_2RM_VTRN:
6592 if (size == 2) {
6593 int n;
6594 for (n = 0; n < (q ? 4 : 2); n += 2) {
6595 tmp = neon_load_reg(rm, n);
6596 tmp2 = neon_load_reg(rd, n + 1);
6597 neon_store_reg(rm, n, tmp2);
6598 neon_store_reg(rd, n + 1, tmp);
6599 }
6600 } else {
6601 goto elementwise;
6602 }
6603 break;
6604 case NEON_2RM_VUZP:
6605 if (gen_neon_unzip(rd, rm, size, q)) {
6606 return 1;
6607 }
6608 break;
6609 case NEON_2RM_VZIP:
6610 if (gen_neon_zip(rd, rm, size, q)) {
6611 return 1;
6612 }
6613 break;
6614 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6615 /* also VQMOVUN; op field and mnemonics don't line up */
6616 if (rm & 1) {
6617 return 1;
6618 }
6619 tmp2 = NULL;
6620 for (pass = 0; pass < 2; pass++) {
6621 neon_load_reg64(cpu_V0, rm + pass);
6622 tmp = tcg_temp_new_i32();
6623 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6624 tmp, cpu_V0);
6625 if (pass == 0) {
6626 tmp2 = tmp;
6627 } else {
6628 neon_store_reg(rd, 0, tmp2);
6629 neon_store_reg(rd, 1, tmp);
6630 }
6631 }
6632 break;
6633 case NEON_2RM_VSHLL:
6634 if (q || (rd & 1)) {
6635 return 1;
6636 }
6637 tmp = neon_load_reg(rm, 0);
6638 tmp2 = neon_load_reg(rm, 1);
6639 for (pass = 0; pass < 2; pass++) {
6640 if (pass == 1)
6641 tmp = tmp2;
6642 gen_neon_widen(cpu_V0, tmp, size, 1);
6643 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6644 neon_store_reg64(cpu_V0, rd + pass);
6645 }
6646 break;
6647 case NEON_2RM_VCVT_F16_F32:
6648 {
6649 TCGv_ptr fpst;
6650 TCGv_i32 ahp;
6651
6652 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6653 q || (rm & 1)) {
6654 return 1;
6655 }
6656 fpst = get_fpstatus_ptr(true);
6657 ahp = get_ahp_flag();
6658 tmp = neon_load_reg(rm, 0);
6659 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6660 tmp2 = neon_load_reg(rm, 1);
6661 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6662 tcg_gen_shli_i32(tmp2, tmp2, 16);
6663 tcg_gen_or_i32(tmp2, tmp2, tmp);
6664 tcg_temp_free_i32(tmp);
6665 tmp = neon_load_reg(rm, 2);
6666 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6667 tmp3 = neon_load_reg(rm, 3);
6668 neon_store_reg(rd, 0, tmp2);
6669 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6670 tcg_gen_shli_i32(tmp3, tmp3, 16);
6671 tcg_gen_or_i32(tmp3, tmp3, tmp);
6672 neon_store_reg(rd, 1, tmp3);
6673 tcg_temp_free_i32(tmp);
6674 tcg_temp_free_i32(ahp);
6675 tcg_temp_free_ptr(fpst);
6676 break;
6677 }
6678 case NEON_2RM_VCVT_F32_F16:
6679 {
6680 TCGv_ptr fpst;
6681 TCGv_i32 ahp;
6682 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6683 q || (rd & 1)) {
6684 return 1;
6685 }
6686 fpst = get_fpstatus_ptr(true);
6687 ahp = get_ahp_flag();
6688 tmp3 = tcg_temp_new_i32();
6689 tmp = neon_load_reg(rm, 0);
6690 tmp2 = neon_load_reg(rm, 1);
6691 tcg_gen_ext16u_i32(tmp3, tmp);
6692 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6693 neon_store_reg(rd, 0, tmp3);
6694 tcg_gen_shri_i32(tmp, tmp, 16);
6695 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6696 neon_store_reg(rd, 1, tmp);
6697 tmp3 = tcg_temp_new_i32();
6698 tcg_gen_ext16u_i32(tmp3, tmp2);
6699 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6700 neon_store_reg(rd, 2, tmp3);
6701 tcg_gen_shri_i32(tmp2, tmp2, 16);
6702 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6703 neon_store_reg(rd, 3, tmp2);
6704 tcg_temp_free_i32(ahp);
6705 tcg_temp_free_ptr(fpst);
6706 break;
6707 }
6708 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6709 if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6710 return 1;
6711 }
6712 ptr1 = vfp_reg_ptr(true, rd);
6713 ptr2 = vfp_reg_ptr(true, rm);
6714
6715 /* Bit 6 is the lowest opcode bit; it distinguishes between
6716 * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6717 */
6718 tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6719
6720 if (op == NEON_2RM_AESE) {
6721 gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6722 } else {
6723 gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6724 }
6725 tcg_temp_free_ptr(ptr1);
6726 tcg_temp_free_ptr(ptr2);
6727 tcg_temp_free_i32(tmp3);
6728 break;
6729 case NEON_2RM_SHA1H:
6730 if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6731 return 1;
6732 }
6733 ptr1 = vfp_reg_ptr(true, rd);
6734 ptr2 = vfp_reg_ptr(true, rm);
6735
6736 gen_helper_crypto_sha1h(ptr1, ptr2);
6737
6738 tcg_temp_free_ptr(ptr1);
6739 tcg_temp_free_ptr(ptr2);
6740 break;
6741 case NEON_2RM_SHA1SU1:
6742 if ((rm | rd) & 1) {
6743 return 1;
6744 }
6745 /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6746 if (q) {
6747 if (!dc_isar_feature(aa32_sha2, s)) {
6748 return 1;
6749 }
6750 } else if (!dc_isar_feature(aa32_sha1, s)) {
6751 return 1;
6752 }
6753 ptr1 = vfp_reg_ptr(true, rd);
6754 ptr2 = vfp_reg_ptr(true, rm);
6755 if (q) {
6756 gen_helper_crypto_sha256su0(ptr1, ptr2);
6757 } else {
6758 gen_helper_crypto_sha1su1(ptr1, ptr2);
6759 }
6760 tcg_temp_free_ptr(ptr1);
6761 tcg_temp_free_ptr(ptr2);
6762 break;
6763
6764 case NEON_2RM_VMVN:
6765 tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6766 break;
6767 case NEON_2RM_VNEG:
6768 tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6769 break;
6770 case NEON_2RM_VABS:
6771 tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6772 break;
6773
6774 case NEON_2RM_VCEQ0:
6775 tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6776 vec_size, &ceq0_op[size]);
6777 break;
6778 case NEON_2RM_VCGT0:
6779 tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6780 vec_size, &cgt0_op[size]);
6781 break;
6782 case NEON_2RM_VCLE0:
6783 tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6784 vec_size, &cle0_op[size]);
6785 break;
6786 case NEON_2RM_VCGE0:
6787 tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6788 vec_size, &cge0_op[size]);
6789 break;
6790 case NEON_2RM_VCLT0:
6791 tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
6792 vec_size, &clt0_op[size]);
6793 break;
6794
6795 default:
6796 elementwise:
6797 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6798 tmp = neon_load_reg(rm, pass);
6799 switch (op) {
6800 case NEON_2RM_VREV32:
6801 switch (size) {
6802 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6803 case 1: gen_swap_half(tmp); break;
6804 default: abort();
6805 }
6806 break;
6807 case NEON_2RM_VREV16:
6808 gen_rev16(tmp, tmp);
6809 break;
6810 case NEON_2RM_VCLS:
6811 switch (size) {
6812 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6813 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6814 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6815 default: abort();
6816 }
6817 break;
6818 case NEON_2RM_VCLZ:
6819 switch (size) {
6820 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6821 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6822 case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6823 default: abort();
6824 }
6825 break;
6826 case NEON_2RM_VCNT:
6827 gen_helper_neon_cnt_u8(tmp, tmp);
6828 break;
6829 case NEON_2RM_VQABS:
6830 switch (size) {
6831 case 0:
6832 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6833 break;
6834 case 1:
6835 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6836 break;
6837 case 2:
6838 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6839 break;
6840 default: abort();
6841 }
6842 break;
6843 case NEON_2RM_VQNEG:
6844 switch (size) {
6845 case 0:
6846 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6847 break;
6848 case 1:
6849 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6850 break;
6851 case 2:
6852 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6853 break;
6854 default: abort();
6855 }
6856 break;
6857 case NEON_2RM_VCGT0_F:
6858 {
6859 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6860 tmp2 = tcg_const_i32(0);
6861 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6862 tcg_temp_free_i32(tmp2);
6863 tcg_temp_free_ptr(fpstatus);
6864 break;
6865 }
6866 case NEON_2RM_VCGE0_F:
6867 {
6868 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6869 tmp2 = tcg_const_i32(0);
6870 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6871 tcg_temp_free_i32(tmp2);
6872 tcg_temp_free_ptr(fpstatus);
6873 break;
6874 }
6875 case NEON_2RM_VCEQ0_F:
6876 {
6877 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6878 tmp2 = tcg_const_i32(0);
6879 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6880 tcg_temp_free_i32(tmp2);
6881 tcg_temp_free_ptr(fpstatus);
6882 break;
6883 }
6884 case NEON_2RM_VCLE0_F:
6885 {
6886 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6887 tmp2 = tcg_const_i32(0);
6888 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6889 tcg_temp_free_i32(tmp2);
6890 tcg_temp_free_ptr(fpstatus);
6891 break;
6892 }
6893 case NEON_2RM_VCLT0_F:
6894 {
6895 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6896 tmp2 = tcg_const_i32(0);
6897 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6898 tcg_temp_free_i32(tmp2);
6899 tcg_temp_free_ptr(fpstatus);
6900 break;
6901 }
6902 case NEON_2RM_VABS_F:
6903 gen_helper_vfp_abss(tmp, tmp);
6904 break;
6905 case NEON_2RM_VNEG_F:
6906 gen_helper_vfp_negs(tmp, tmp);
6907 break;
6908 case NEON_2RM_VSWP:
6909 tmp2 = neon_load_reg(rd, pass);
6910 neon_store_reg(rm, pass, tmp2);
6911 break;
6912 case NEON_2RM_VTRN:
6913 tmp2 = neon_load_reg(rd, pass);
6914 switch (size) {
6915 case 0: gen_neon_trn_u8(tmp, tmp2); break;
6916 case 1: gen_neon_trn_u16(tmp, tmp2); break;
6917 default: abort();
6918 }
6919 neon_store_reg(rm, pass, tmp2);
6920 break;
6921 case NEON_2RM_VRINTN:
6922 case NEON_2RM_VRINTA:
6923 case NEON_2RM_VRINTM:
6924 case NEON_2RM_VRINTP:
6925 case NEON_2RM_VRINTZ:
6926 {
6927 TCGv_i32 tcg_rmode;
6928 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6929 int rmode;
6930
6931 if (op == NEON_2RM_VRINTZ) {
6932 rmode = FPROUNDING_ZERO;
6933 } else {
6934 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6935 }
6936
6937 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6938 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6939 cpu_env);
6940 gen_helper_rints(tmp, tmp, fpstatus);
6941 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6942 cpu_env);
6943 tcg_temp_free_ptr(fpstatus);
6944 tcg_temp_free_i32(tcg_rmode);
6945 break;
6946 }
6947 case NEON_2RM_VRINTX:
6948 {
6949 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6950 gen_helper_rints_exact(tmp, tmp, fpstatus);
6951 tcg_temp_free_ptr(fpstatus);
6952 break;
6953 }
6954 case NEON_2RM_VCVTAU:
6955 case NEON_2RM_VCVTAS:
6956 case NEON_2RM_VCVTNU:
6957 case NEON_2RM_VCVTNS:
6958 case NEON_2RM_VCVTPU:
6959 case NEON_2RM_VCVTPS:
6960 case NEON_2RM_VCVTMU:
6961 case NEON_2RM_VCVTMS:
6962 {
6963 bool is_signed = !extract32(insn, 7, 1);
6964 TCGv_ptr fpst = get_fpstatus_ptr(1);
6965 TCGv_i32 tcg_rmode, tcg_shift;
6966 int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6967
6968 tcg_shift = tcg_const_i32(0);
6969 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6970 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6971 cpu_env);
6972
6973 if (is_signed) {
6974 gen_helper_vfp_tosls(tmp, tmp,
6975 tcg_shift, fpst);
6976 } else {
6977 gen_helper_vfp_touls(tmp, tmp,
6978 tcg_shift, fpst);
6979 }
6980
6981 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6982 cpu_env);
6983 tcg_temp_free_i32(tcg_rmode);
6984 tcg_temp_free_i32(tcg_shift);
6985 tcg_temp_free_ptr(fpst);
6986 break;
6987 }
6988 case NEON_2RM_VRECPE:
6989 {
6990 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6991 gen_helper_recpe_u32(tmp, tmp, fpstatus);
6992 tcg_temp_free_ptr(fpstatus);
6993 break;
6994 }
6995 case NEON_2RM_VRSQRTE:
6996 {
6997 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6998 gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
6999 tcg_temp_free_ptr(fpstatus);
7000 break;
7001 }
7002 case NEON_2RM_VRECPE_F:
7003 {
7004 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7005 gen_helper_recpe_f32(tmp, tmp, fpstatus);
7006 tcg_temp_free_ptr(fpstatus);
7007 break;
7008 }
7009 case NEON_2RM_VRSQRTE_F:
7010 {
7011 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7012 gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
7013 tcg_temp_free_ptr(fpstatus);
7014 break;
7015 }
7016 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
7017 {
7018 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7019 gen_helper_vfp_sitos(tmp, tmp, fpstatus);
7020 tcg_temp_free_ptr(fpstatus);
7021 break;
7022 }
7023 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
7024 {
7025 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7026 gen_helper_vfp_uitos(tmp, tmp, fpstatus);
7027 tcg_temp_free_ptr(fpstatus);
7028 break;
7029 }
7030 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
7031 {
7032 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7033 gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
7034 tcg_temp_free_ptr(fpstatus);
7035 break;
7036 }
7037 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
7038 {
7039 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7040 gen_helper_vfp_touizs(tmp, tmp, fpstatus);
7041 tcg_temp_free_ptr(fpstatus);
7042 break;
7043 }
7044 default:
7045 /* Reserved op values were caught by the
7046 * neon_2rm_sizes[] check earlier.
7047 */
7048 abort();
7049 }
7050 neon_store_reg(rd, pass, tmp);
7051 }
7052 break;
7053 }
7054 } else if ((insn & (1 << 10)) == 0) {
7055 /* VTBL, VTBX. */
7056 int n = ((insn >> 8) & 3) + 1;
7057 if ((rn + n) > 32) {
7058 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
7059 * helper function running off the end of the register file.
7060 */
7061 return 1;
7062 }
7063 n <<= 3;
7064 if (insn & (1 << 6)) {
7065 tmp = neon_load_reg(rd, 0);
7066 } else {
7067 tmp = tcg_temp_new_i32();
7068 tcg_gen_movi_i32(tmp, 0);
7069 }
7070 tmp2 = neon_load_reg(rm, 0);
7071 ptr1 = vfp_reg_ptr(true, rn);
7072 tmp5 = tcg_const_i32(n);
7073 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
7074 tcg_temp_free_i32(tmp);
7075 if (insn & (1 << 6)) {
7076 tmp = neon_load_reg(rd, 1);
7077 } else {
7078 tmp = tcg_temp_new_i32();
7079 tcg_gen_movi_i32(tmp, 0);
7080 }
7081 tmp3 = neon_load_reg(rm, 1);
7082 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
7083 tcg_temp_free_i32(tmp5);
7084 tcg_temp_free_ptr(ptr1);
7085 neon_store_reg(rd, 0, tmp2);
7086 neon_store_reg(rd, 1, tmp3);
7087 tcg_temp_free_i32(tmp);
7088 } else if ((insn & 0x380) == 0) {
7089 /* VDUP */
7090 int element;
7091 MemOp size;
7092
7093 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
7094 return 1;
7095 }
7096 if (insn & (1 << 16)) {
7097 size = MO_8;
7098 element = (insn >> 17) & 7;
7099 } else if (insn & (1 << 17)) {
7100 size = MO_16;
7101 element = (insn >> 18) & 3;
7102 } else {
7103 size = MO_32;
7104 element = (insn >> 19) & 1;
7105 }
7106 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
7107 neon_element_offset(rm, element, size),
7108 q ? 16 : 8, q ? 16 : 8);
7109 } else {
7110 return 1;
7111 }
7112 }
7113 }
7114 return 0;
7115 }
7116
7117 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
7118 {
7119 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
7120 const ARMCPRegInfo *ri;
7121
7122 cpnum = (insn >> 8) & 0xf;
7123
7124 /* First check for coprocessor space used for XScale/iwMMXt insns */
7125 if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
7126 if (extract32(s->c15_cpar, cpnum, 1) == 0) {
7127 return 1;
7128 }
7129 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7130 return disas_iwmmxt_insn(s, insn);
7131 } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
7132 return disas_dsp_insn(s, insn);
7133 }
7134 return 1;
7135 }
7136
7137 /* Otherwise treat as a generic register access */
7138 is64 = (insn & (1 << 25)) == 0;
7139 if (!is64 && ((insn & (1 << 4)) == 0)) {
7140 /* cdp */
7141 return 1;
7142 }
7143
7144 crm = insn & 0xf;
7145 if (is64) {
7146 crn = 0;
7147 opc1 = (insn >> 4) & 0xf;
7148 opc2 = 0;
7149 rt2 = (insn >> 16) & 0xf;
7150 } else {
7151 crn = (insn >> 16) & 0xf;
7152 opc1 = (insn >> 21) & 7;
7153 opc2 = (insn >> 5) & 7;
7154 rt2 = 0;
7155 }
7156 isread = (insn >> 20) & 1;
7157 rt = (insn >> 12) & 0xf;
7158
7159 ri = get_arm_cp_reginfo(s->cp_regs,
7160 ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7161 if (ri) {
7162 bool need_exit_tb;
7163
7164 /* Check access permissions */
7165 if (!cp_access_ok(s->current_el, ri, isread)) {
7166 return 1;
7167 }
7168
7169 if (s->hstr_active || ri->accessfn ||
7170 (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7171 /* Emit code to perform further access permissions checks at
7172 * runtime; this may result in an exception.
7173 * Note that on XScale all cp0..c13 registers do an access check
7174 * call in order to handle c15_cpar.
7175 */
7176 TCGv_ptr tmpptr;
7177 TCGv_i32 tcg_syn, tcg_isread;
7178 uint32_t syndrome;
7179
7180 /* Note that since we are an implementation which takes an
7181 * exception on a trapped conditional instruction only if the
7182 * instruction passes its condition code check, we can take
7183 * advantage of the clause in the ARM ARM that allows us to set
7184 * the COND field in the instruction to 0xE in all cases.
7185 * We could fish the actual condition out of the insn (ARM)
7186 * or the condexec bits (Thumb) but it isn't necessary.
7187 */
7188 switch (cpnum) {
7189 case 14:
7190 if (is64) {
7191 syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7192 isread, false);
7193 } else {
7194 syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7195 rt, isread, false);
7196 }
7197 break;
7198 case 15:
7199 if (is64) {
7200 syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7201 isread, false);
7202 } else {
7203 syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7204 rt, isread, false);
7205 }
7206 break;
7207 default:
7208 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7209 * so this can only happen if this is an ARMv7 or earlier CPU,
7210 * in which case the syndrome information won't actually be
7211 * guest visible.
7212 */
7213 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7214 syndrome = syn_uncategorized();
7215 break;
7216 }
7217
7218 gen_set_condexec(s);
7219 gen_set_pc_im(s, s->pc_curr);
7220 tmpptr = tcg_const_ptr(ri);
7221 tcg_syn = tcg_const_i32(syndrome);
7222 tcg_isread = tcg_const_i32(isread);
7223 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
7224 tcg_isread);
7225 tcg_temp_free_ptr(tmpptr);
7226 tcg_temp_free_i32(tcg_syn);
7227 tcg_temp_free_i32(tcg_isread);
7228 } else if (ri->type & ARM_CP_RAISES_EXC) {
7229 /*
7230 * The readfn or writefn might raise an exception;
7231 * synchronize the CPU state in case it does.
7232 */
7233 gen_set_condexec(s);
7234 gen_set_pc_im(s, s->pc_curr);
7235 }
7236
7237 /* Handle special cases first */
7238 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7239 case ARM_CP_NOP:
7240 return 0;
7241 case ARM_CP_WFI:
7242 if (isread) {
7243 return 1;
7244 }
7245 gen_set_pc_im(s, s->base.pc_next);
7246 s->base.is_jmp = DISAS_WFI;
7247 return 0;
7248 default:
7249 break;
7250 }
7251
7252 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7253 gen_io_start();
7254 }
7255
7256 if (isread) {
7257 /* Read */
7258 if (is64) {
7259 TCGv_i64 tmp64;
7260 TCGv_i32 tmp;
7261 if (ri->type & ARM_CP_CONST) {
7262 tmp64 = tcg_const_i64(ri->resetvalue);
7263 } else if (ri->readfn) {
7264 TCGv_ptr tmpptr;
7265 tmp64 = tcg_temp_new_i64();
7266 tmpptr = tcg_const_ptr(ri);
7267 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7268 tcg_temp_free_ptr(tmpptr);
7269 } else {
7270 tmp64 = tcg_temp_new_i64();
7271 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7272 }
7273 tmp = tcg_temp_new_i32();
7274 tcg_gen_extrl_i64_i32(tmp, tmp64);
7275 store_reg(s, rt, tmp);
7276 tmp = tcg_temp_new_i32();
7277 tcg_gen_extrh_i64_i32(tmp, tmp64);
7278 tcg_temp_free_i64(tmp64);
7279 store_reg(s, rt2, tmp);
7280 } else {
7281 TCGv_i32 tmp;
7282 if (ri->type & ARM_CP_CONST) {
7283 tmp = tcg_const_i32(ri->resetvalue);
7284 } else if (ri->readfn) {
7285 TCGv_ptr tmpptr;
7286 tmp = tcg_temp_new_i32();
7287 tmpptr = tcg_const_ptr(ri);
7288 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7289 tcg_temp_free_ptr(tmpptr);
7290 } else {
7291 tmp = load_cpu_offset(ri->fieldoffset);
7292 }
7293 if (rt == 15) {
7294 /* Destination register of r15 for 32 bit loads sets
7295 * the condition codes from the high 4 bits of the value
7296 */
7297 gen_set_nzcv(tmp);
7298 tcg_temp_free_i32(tmp);
7299 } else {
7300 store_reg(s, rt, tmp);
7301 }
7302 }
7303 } else {
7304 /* Write */
7305 if (ri->type & ARM_CP_CONST) {
7306 /* If not forbidden by access permissions, treat as WI */
7307 return 0;
7308 }
7309
7310 if (is64) {
7311 TCGv_i32 tmplo, tmphi;
7312 TCGv_i64 tmp64 = tcg_temp_new_i64();
7313 tmplo = load_reg(s, rt);
7314 tmphi = load_reg(s, rt2);
7315 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7316 tcg_temp_free_i32(tmplo);
7317 tcg_temp_free_i32(tmphi);
7318 if (ri->writefn) {
7319 TCGv_ptr tmpptr = tcg_const_ptr(ri);
7320 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7321 tcg_temp_free_ptr(tmpptr);
7322 } else {
7323 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7324 }
7325 tcg_temp_free_i64(tmp64);
7326 } else {
7327 if (ri->writefn) {
7328 TCGv_i32 tmp;
7329 TCGv_ptr tmpptr;
7330 tmp = load_reg(s, rt);
7331 tmpptr = tcg_const_ptr(ri);
7332 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7333 tcg_temp_free_ptr(tmpptr);
7334 tcg_temp_free_i32(tmp);
7335 } else {
7336 TCGv_i32 tmp = load_reg(s, rt);
7337 store_cpu_offset(tmp, ri->fieldoffset);
7338 }
7339 }
7340 }
7341
7342 /* I/O operations must end the TB here (whether read or write) */
7343 need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
7344 (ri->type & ARM_CP_IO));
7345
7346 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7347 /*
7348 * A write to any coprocessor register that ends a TB
7349 * must rebuild the hflags for the next TB.
7350 */
7351 TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
7352 if (arm_dc_feature(s, ARM_FEATURE_M)) {
7353 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
7354 } else {
7355 if (ri->type & ARM_CP_NEWEL) {
7356 gen_helper_rebuild_hflags_a32_newel(cpu_env);
7357 } else {
7358 gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
7359 }
7360 }
7361 tcg_temp_free_i32(tcg_el);
7362 /*
7363 * We default to ending the TB on a coprocessor register write,
7364 * but allow this to be suppressed by the register definition
7365 * (usually only necessary to work around guest bugs).
7366 */
7367 need_exit_tb = true;
7368 }
7369 if (need_exit_tb) {
7370 gen_lookup_tb(s);
7371 }
7372
7373 return 0;
7374 }
7375
7376 /* Unknown register; this might be a guest error or a QEMU
7377 * unimplemented feature.
7378 */
7379 if (is64) {
7380 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7381 "64 bit system register cp:%d opc1: %d crm:%d "
7382 "(%s)\n",
7383 isread ? "read" : "write", cpnum, opc1, crm,
7384 s->ns ? "non-secure" : "secure");
7385 } else {
7386 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7387 "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7388 "(%s)\n",
7389 isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7390 s->ns ? "non-secure" : "secure");
7391 }
7392
7393 return 1;
7394 }
7395
7396
7397 /* Store a 64-bit value to a register pair. Clobbers val. */
7398 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7399 {
7400 TCGv_i32 tmp;
7401 tmp = tcg_temp_new_i32();
7402 tcg_gen_extrl_i64_i32(tmp, val);
7403 store_reg(s, rlow, tmp);
7404 tmp = tcg_temp_new_i32();
7405 tcg_gen_extrh_i64_i32(tmp, val);
7406 store_reg(s, rhigh, tmp);
7407 }
7408
7409 /* load and add a 64-bit value from a register pair. */
7410 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7411 {
7412 TCGv_i64 tmp;
7413 TCGv_i32 tmpl;
7414 TCGv_i32 tmph;
7415
7416 /* Load 64-bit value rd:rn. */
7417 tmpl = load_reg(s, rlow);
7418 tmph = load_reg(s, rhigh);
7419 tmp = tcg_temp_new_i64();
7420 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7421 tcg_temp_free_i32(tmpl);
7422 tcg_temp_free_i32(tmph);
7423 tcg_gen_add_i64(val, val, tmp);
7424 tcg_temp_free_i64(tmp);
7425 }
7426
7427 /* Set N and Z flags from hi|lo. */
7428 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7429 {
7430 tcg_gen_mov_i32(cpu_NF, hi);
7431 tcg_gen_or_i32(cpu_ZF, lo, hi);
7432 }
7433
7434 /* Load/Store exclusive instructions are implemented by remembering
7435 the value/address loaded, and seeing if these are the same
7436 when the store is performed. This should be sufficient to implement
7437 the architecturally mandated semantics, and avoids having to monitor
7438 regular stores. The compare vs the remembered value is done during
7439 the cmpxchg operation, but we must compare the addresses manually. */
7440 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7441 TCGv_i32 addr, int size)
7442 {
7443 TCGv_i32 tmp = tcg_temp_new_i32();
7444 MemOp opc = size | MO_ALIGN | s->be_data;
7445
7446 s->is_ldex = true;
7447
7448 if (size == 3) {
7449 TCGv_i32 tmp2 = tcg_temp_new_i32();
7450 TCGv_i64 t64 = tcg_temp_new_i64();
7451
7452 /* For AArch32, architecturally the 32-bit word at the lowest
7453 * address is always Rt and the one at addr+4 is Rt2, even if
7454 * the CPU is big-endian. That means we don't want to do a
7455 * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7456 * for an architecturally 64-bit access, but instead do a
7457 * 64-bit access using MO_BE if appropriate and then split
7458 * the two halves.
7459 * This only makes a difference for BE32 user-mode, where
7460 * frob64() must not flip the two halves of the 64-bit data
7461 * but this code must treat BE32 user-mode like BE32 system.
7462 */
7463 TCGv taddr = gen_aa32_addr(s, addr, opc);
7464
7465 tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7466 tcg_temp_free(taddr);
7467 tcg_gen_mov_i64(cpu_exclusive_val, t64);
7468 if (s->be_data == MO_BE) {
7469 tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7470 } else {
7471 tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7472 }
7473 tcg_temp_free_i64(t64);
7474
7475 store_reg(s, rt2, tmp2);
7476 } else {
7477 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7478 tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7479 }
7480
7481 store_reg(s, rt, tmp);
7482 tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7483 }
7484
7485 static void gen_clrex(DisasContext *s)
7486 {
7487 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7488 }
7489
7490 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7491 TCGv_i32 addr, int size)
7492 {
7493 TCGv_i32 t0, t1, t2;
7494 TCGv_i64 extaddr;
7495 TCGv taddr;
7496 TCGLabel *done_label;
7497 TCGLabel *fail_label;
7498 MemOp opc = size | MO_ALIGN | s->be_data;
7499
7500 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7501 [addr] = {Rt};
7502 {Rd} = 0;
7503 } else {
7504 {Rd} = 1;
7505 } */
7506 fail_label = gen_new_label();
7507 done_label = gen_new_label();
7508 extaddr = tcg_temp_new_i64();
7509 tcg_gen_extu_i32_i64(extaddr, addr);
7510 tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7511 tcg_temp_free_i64(extaddr);
7512
7513 taddr = gen_aa32_addr(s, addr, opc);
7514 t0 = tcg_temp_new_i32();
7515 t1 = load_reg(s, rt);
7516 if (size == 3) {
7517 TCGv_i64 o64 = tcg_temp_new_i64();
7518 TCGv_i64 n64 = tcg_temp_new_i64();
7519
7520 t2 = load_reg(s, rt2);
7521 /* For AArch32, architecturally the 32-bit word at the lowest
7522 * address is always Rt and the one at addr+4 is Rt2, even if
7523 * the CPU is big-endian. Since we're going to treat this as a
7524 * single 64-bit BE store, we need to put the two halves in the
7525 * opposite order for BE to LE, so that they end up in the right
7526 * places.
7527 * We don't want gen_aa32_frob64() because that does the wrong
7528 * thing for BE32 usermode.
7529 */
7530 if (s->be_data == MO_BE) {
7531 tcg_gen_concat_i32_i64(n64, t2, t1);
7532 } else {
7533 tcg_gen_concat_i32_i64(n64, t1, t2);
7534 }
7535 tcg_temp_free_i32(t2);
7536
7537 tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7538 get_mem_index(s), opc);
7539 tcg_temp_free_i64(n64);
7540
7541 tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7542 tcg_gen_extrl_i64_i32(t0, o64);
7543
7544 tcg_temp_free_i64(o64);
7545 } else {
7546 t2 = tcg_temp_new_i32();
7547 tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7548 tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7549 tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7550 tcg_temp_free_i32(t2);
7551 }
7552 tcg_temp_free_i32(t1);
7553 tcg_temp_free(taddr);
7554 tcg_gen_mov_i32(cpu_R[rd], t0);
7555 tcg_temp_free_i32(t0);
7556 tcg_gen_br(done_label);
7557
7558 gen_set_label(fail_label);
7559 tcg_gen_movi_i32(cpu_R[rd], 1);
7560 gen_set_label(done_label);
7561 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7562 }
7563
7564 /* gen_srs:
7565 * @env: CPUARMState
7566 * @s: DisasContext
7567 * @mode: mode field from insn (which stack to store to)
7568 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7569 * @writeback: true if writeback bit set
7570 *
7571 * Generate code for the SRS (Store Return State) insn.
7572 */
7573 static void gen_srs(DisasContext *s,
7574 uint32_t mode, uint32_t amode, bool writeback)
7575 {
7576 int32_t offset;
7577 TCGv_i32 addr, tmp;
7578 bool undef = false;
7579
7580 /* SRS is:
7581 * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7582 * and specified mode is monitor mode
7583 * - UNDEFINED in Hyp mode
7584 * - UNPREDICTABLE in User or System mode
7585 * - UNPREDICTABLE if the specified mode is:
7586 * -- not implemented
7587 * -- not a valid mode number
7588 * -- a mode that's at a higher exception level
7589 * -- Monitor, if we are Non-secure
7590 * For the UNPREDICTABLE cases we choose to UNDEF.
7591 */
7592 if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7593 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7594 return;
7595 }
7596
7597 if (s->current_el == 0 || s->current_el == 2) {
7598 undef = true;
7599 }
7600
7601 switch (mode) {
7602 case ARM_CPU_MODE_USR:
7603 case ARM_CPU_MODE_FIQ:
7604 case ARM_CPU_MODE_IRQ:
7605 case ARM_CPU_MODE_SVC:
7606 case ARM_CPU_MODE_ABT:
7607 case ARM_CPU_MODE_UND:
7608 case ARM_CPU_MODE_SYS:
7609 break;
7610 case ARM_CPU_MODE_HYP:
7611 if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7612 undef = true;
7613 }
7614 break;
7615 case ARM_CPU_MODE_MON:
7616 /* No need to check specifically for "are we non-secure" because
7617 * we've already made EL0 UNDEF and handled the trap for S-EL1;
7618 * so if this isn't EL3 then we must be non-secure.
7619 */
7620 if (s->current_el != 3) {
7621 undef = true;
7622 }
7623 break;
7624 default:
7625 undef = true;
7626 }
7627
7628 if (undef) {
7629 unallocated_encoding(s);
7630 return;
7631 }
7632
7633 addr = tcg_temp_new_i32();
7634 tmp = tcg_const_i32(mode);
7635 /* get_r13_banked() will raise an exception if called from System mode */
7636 gen_set_condexec(s);
7637 gen_set_pc_im(s, s->pc_curr);
7638 gen_helper_get_r13_banked(addr, cpu_env, tmp);
7639 tcg_temp_free_i32(tmp);
7640 switch (amode) {
7641 case 0: /* DA */
7642 offset = -4;
7643 break;
7644 case 1: /* IA */
7645 offset = 0;
7646 break;
7647 case 2: /* DB */
7648 offset = -8;
7649 break;
7650 case 3: /* IB */
7651 offset = 4;
7652 break;
7653 default:
7654 abort();
7655 }
7656 tcg_gen_addi_i32(addr, addr, offset);
7657 tmp = load_reg(s, 14);
7658 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7659 tcg_temp_free_i32(tmp);
7660 tmp = load_cpu_field(spsr);
7661 tcg_gen_addi_i32(addr, addr, 4);
7662 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7663 tcg_temp_free_i32(tmp);
7664 if (writeback) {
7665 switch (amode) {
7666 case 0:
7667 offset = -8;
7668 break;
7669 case 1:
7670 offset = 4;
7671 break;
7672 case 2:
7673 offset = -4;
7674 break;
7675 case 3:
7676 offset = 0;
7677 break;
7678 default:
7679 abort();
7680 }
7681 tcg_gen_addi_i32(addr, addr, offset);
7682 tmp = tcg_const_i32(mode);
7683 gen_helper_set_r13_banked(cpu_env, tmp, addr);
7684 tcg_temp_free_i32(tmp);
7685 }
7686 tcg_temp_free_i32(addr);
7687 s->base.is_jmp = DISAS_UPDATE;
7688 }
7689
7690 /* Generate a label used for skipping this instruction */
7691 static void arm_gen_condlabel(DisasContext *s)
7692 {
7693 if (!s->condjmp) {
7694 s->condlabel = gen_new_label();
7695 s->condjmp = 1;
7696 }
7697 }
7698
7699 /* Skip this instruction if the ARM condition is false */
7700 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7701 {
7702 arm_gen_condlabel(s);
7703 arm_gen_test_cc(cond ^ 1, s->condlabel);
7704 }
7705
7706
7707 /*
7708 * Constant expanders for the decoders.
7709 */
7710
7711 static int negate(DisasContext *s, int x)
7712 {
7713 return -x;
7714 }
7715
7716 static int plus_2(DisasContext *s, int x)
7717 {
7718 return x + 2;
7719 }
7720
7721 static int times_2(DisasContext *s, int x)
7722 {
7723 return x * 2;
7724 }
7725
7726 static int times_4(DisasContext *s, int x)
7727 {
7728 return x * 4;
7729 }
7730
7731 /* Return only the rotation part of T32ExpandImm. */
7732 static int t32_expandimm_rot(DisasContext *s, int x)
7733 {
7734 return x & 0xc00 ? extract32(x, 7, 5) : 0;
7735 }
7736
7737 /* Return the unrotated immediate from T32ExpandImm. */
7738 static int t32_expandimm_imm(DisasContext *s, int x)
7739 {
7740 int imm = extract32(x, 0, 8);
7741
7742 switch (extract32(x, 8, 4)) {
7743 case 0: /* XY */
7744 /* Nothing to do. */
7745 break;
7746 case 1: /* 00XY00XY */
7747 imm *= 0x00010001;
7748 break;
7749 case 2: /* XY00XY00 */
7750 imm *= 0x01000100;
7751 break;
7752 case 3: /* XYXYXYXY */
7753 imm *= 0x01010101;
7754 break;
7755 default:
7756 /* Rotated constant. */
7757 imm |= 0x80;
7758 break;
7759 }
7760 return imm;
7761 }
7762
7763 static int t32_branch24(DisasContext *s, int x)
7764 {
7765 /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S. */
7766 x ^= !(x < 0) * (3 << 21);
7767 /* Append the final zero. */
7768 return x << 1;
7769 }
7770
7771 static int t16_setflags(DisasContext *s)
7772 {
7773 return s->condexec_mask == 0;
7774 }
7775
7776 static int t16_push_list(DisasContext *s, int x)
7777 {
7778 return (x & 0xff) | (x & 0x100) << (14 - 8);
7779 }
7780
7781 static int t16_pop_list(DisasContext *s, int x)
7782 {
7783 return (x & 0xff) | (x & 0x100) << (15 - 8);
7784 }
7785
7786 /*
7787 * Include the generated decoders.
7788 */
7789
7790 #include "decode-a32.inc.c"
7791 #include "decode-a32-uncond.inc.c"
7792 #include "decode-t32.inc.c"
7793 #include "decode-t16.inc.c"
7794
7795 /* Helpers to swap operands for reverse-subtract. */
7796 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7797 {
7798 tcg_gen_sub_i32(dst, b, a);
7799 }
7800
7801 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7802 {
7803 gen_sub_CC(dst, b, a);
7804 }
7805
7806 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7807 {
7808 gen_sub_carry(dest, b, a);
7809 }
7810
7811 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7812 {
7813 gen_sbc_CC(dest, b, a);
7814 }
7815
7816 /*
7817 * Helpers for the data processing routines.
7818 *
7819 * After the computation store the results back.
7820 * This may be suppressed altogether (STREG_NONE), require a runtime
7821 * check against the stack limits (STREG_SP_CHECK), or generate an
7822 * exception return. Oh, or store into a register.
7823 *
7824 * Always return true, indicating success for a trans_* function.
7825 */
7826 typedef enum {
7827 STREG_NONE,
7828 STREG_NORMAL,
7829 STREG_SP_CHECK,
7830 STREG_EXC_RET,
7831 } StoreRegKind;
7832
7833 static bool store_reg_kind(DisasContext *s, int rd,
7834 TCGv_i32 val, StoreRegKind kind)
7835 {
7836 switch (kind) {
7837 case STREG_NONE:
7838 tcg_temp_free_i32(val);
7839 return true;
7840 case STREG_NORMAL:
7841 /* See ALUWritePC: Interworking only from a32 mode. */
7842 if (s->thumb) {
7843 store_reg(s, rd, val);
7844 } else {
7845 store_reg_bx(s, rd, val);
7846 }
7847 return true;
7848 case STREG_SP_CHECK:
7849 store_sp_checked(s, val);
7850 return true;
7851 case STREG_EXC_RET:
7852 gen_exception_return(s, val);
7853 return true;
7854 }
7855 g_assert_not_reached();
7856 }
7857
7858 /*
7859 * Data Processing (register)
7860 *
7861 * Operate, with set flags, one register source,
7862 * one immediate shifted register source, and a destination.
7863 */
7864 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7865 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7866 int logic_cc, StoreRegKind kind)
7867 {
7868 TCGv_i32 tmp1, tmp2;
7869
7870 tmp2 = load_reg(s, a->rm);
7871 gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7872 tmp1 = load_reg(s, a->rn);
7873
7874 gen(tmp1, tmp1, tmp2);
7875 tcg_temp_free_i32(tmp2);
7876
7877 if (logic_cc) {
7878 gen_logic_CC(tmp1);
7879 }
7880 return store_reg_kind(s, a->rd, tmp1, kind);
7881 }
7882
7883 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7884 void (*gen)(TCGv_i32, TCGv_i32),
7885 int logic_cc, StoreRegKind kind)
7886 {
7887 TCGv_i32 tmp;
7888
7889 tmp = load_reg(s, a->rm);
7890 gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7891
7892 gen(tmp, tmp);
7893 if (logic_cc) {
7894 gen_logic_CC(tmp);
7895 }
7896 return store_reg_kind(s, a->rd, tmp, kind);
7897 }
7898
7899 /*
7900 * Data-processing (register-shifted register)
7901 *
7902 * Operate, with set flags, one register source,
7903 * one register shifted register source, and a destination.
7904 */
7905 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7906 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7907 int logic_cc, StoreRegKind kind)
7908 {
7909 TCGv_i32 tmp1, tmp2;
7910
7911 tmp1 = load_reg(s, a->rs);
7912 tmp2 = load_reg(s, a->rm);
7913 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7914 tmp1 = load_reg(s, a->rn);
7915
7916 gen(tmp1, tmp1, tmp2);
7917 tcg_temp_free_i32(tmp2);
7918
7919 if (logic_cc) {
7920 gen_logic_CC(tmp1);
7921 }
7922 return store_reg_kind(s, a->rd, tmp1, kind);
7923 }
7924
7925 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7926 void (*gen)(TCGv_i32, TCGv_i32),
7927 int logic_cc, StoreRegKind kind)
7928 {
7929 TCGv_i32 tmp1, tmp2;
7930
7931 tmp1 = load_reg(s, a->rs);
7932 tmp2 = load_reg(s, a->rm);
7933 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7934
7935 gen(tmp2, tmp2);
7936 if (logic_cc) {
7937 gen_logic_CC(tmp2);
7938 }
7939 return store_reg_kind(s, a->rd, tmp2, kind);
7940 }
7941
7942 /*
7943 * Data-processing (immediate)
7944 *
7945 * Operate, with set flags, one register source,
7946 * one rotated immediate, and a destination.
7947 *
7948 * Note that logic_cc && a->rot setting CF based on the msb of the
7949 * immediate is the reason why we must pass in the unrotated form
7950 * of the immediate.
7951 */
7952 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7953 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7954 int logic_cc, StoreRegKind kind)
7955 {
7956 TCGv_i32 tmp1, tmp2;
7957 uint32_t imm;
7958
7959 imm = ror32(a->imm, a->rot);
7960 if (logic_cc && a->rot) {
7961 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7962 }
7963 tmp2 = tcg_const_i32(imm);
7964 tmp1 = load_reg(s, a->rn);
7965
7966 gen(tmp1, tmp1, tmp2);
7967 tcg_temp_free_i32(tmp2);
7968
7969 if (logic_cc) {
7970 gen_logic_CC(tmp1);
7971 }
7972 return store_reg_kind(s, a->rd, tmp1, kind);
7973 }
7974
7975 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7976 void (*gen)(TCGv_i32, TCGv_i32),
7977 int logic_cc, StoreRegKind kind)
7978 {
7979 TCGv_i32 tmp;
7980 uint32_t imm;
7981
7982 imm = ror32(a->imm, a->rot);
7983 if (logic_cc && a->rot) {
7984 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7985 }
7986 tmp = tcg_const_i32(imm);
7987
7988 gen(tmp, tmp);
7989 if (logic_cc) {
7990 gen_logic_CC(tmp);
7991 }
7992 return store_reg_kind(s, a->rd, tmp, kind);
7993 }
7994
7995 #define DO_ANY3(NAME, OP, L, K) \
7996 static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a) \
7997 { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); } \
7998 static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a) \
7999 { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); } \
8000 static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a) \
8001 { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
8002
8003 #define DO_ANY2(NAME, OP, L, K) \
8004 static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a) \
8005 { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); } \
8006 static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a) \
8007 { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); } \
8008 static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a) \
8009 { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
8010
8011 #define DO_CMP2(NAME, OP, L) \
8012 static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a) \
8013 { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); } \
8014 static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a) \
8015 { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); } \
8016 static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a) \
8017 { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
8018
8019 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
8020 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
8021 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
8022 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
8023
8024 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
8025 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
8026 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
8027 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
8028
8029 DO_CMP2(TST, tcg_gen_and_i32, true)
8030 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
8031 DO_CMP2(CMN, gen_add_CC, false)
8032 DO_CMP2(CMP, gen_sub_CC, false)
8033
8034 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
8035 a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
8036
8037 /*
8038 * Note for the computation of StoreRegKind we return out of the
8039 * middle of the functions that are expanded by DO_ANY3, and that
8040 * we modify a->s via that parameter before it is used by OP.
8041 */
8042 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
8043 ({
8044 StoreRegKind ret = STREG_NORMAL;
8045 if (a->rd == 15 && a->s) {
8046 /*
8047 * See ALUExceptionReturn:
8048 * In User mode, UNPREDICTABLE; we choose UNDEF.
8049 * In Hyp mode, UNDEFINED.
8050 */
8051 if (IS_USER(s) || s->current_el == 2) {
8052 unallocated_encoding(s);
8053 return true;
8054 }
8055 /* There is no writeback of nzcv to PSTATE. */
8056 a->s = 0;
8057 ret = STREG_EXC_RET;
8058 } else if (a->rd == 13 && a->rn == 13) {
8059 ret = STREG_SP_CHECK;
8060 }
8061 ret;
8062 }))
8063
8064 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
8065 ({
8066 StoreRegKind ret = STREG_NORMAL;
8067 if (a->rd == 15 && a->s) {
8068 /*
8069 * See ALUExceptionReturn:
8070 * In User mode, UNPREDICTABLE; we choose UNDEF.
8071 * In Hyp mode, UNDEFINED.
8072 */
8073 if (IS_USER(s) || s->current_el == 2) {
8074 unallocated_encoding(s);
8075 return true;
8076 }
8077 /* There is no writeback of nzcv to PSTATE. */
8078 a->s = 0;
8079 ret = STREG_EXC_RET;
8080 } else if (a->rd == 13) {
8081 ret = STREG_SP_CHECK;
8082 }
8083 ret;
8084 }))
8085
8086 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
8087
8088 /*
8089 * ORN is only available with T32, so there is no register-shifted-register
8090 * form of the insn. Using the DO_ANY3 macro would create an unused function.
8091 */
8092 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
8093 {
8094 return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
8095 }
8096
8097 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
8098 {
8099 return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
8100 }
8101
8102 #undef DO_ANY3
8103 #undef DO_ANY2
8104 #undef DO_CMP2
8105
8106 static bool trans_ADR(DisasContext *s, arg_ri *a)
8107 {
8108 store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
8109 return true;
8110 }
8111
8112 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
8113 {
8114 TCGv_i32 tmp;
8115
8116 if (!ENABLE_ARCH_6T2) {
8117 return false;
8118 }
8119
8120 tmp = tcg_const_i32(a->imm);
8121 store_reg(s, a->rd, tmp);
8122 return true;
8123 }
8124
8125 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
8126 {
8127 TCGv_i32 tmp;
8128
8129 if (!ENABLE_ARCH_6T2) {
8130 return false;
8131 }
8132
8133 tmp = load_reg(s, a->rd);
8134 tcg_gen_ext16u_i32(tmp, tmp);
8135 tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
8136 store_reg(s, a->rd, tmp);
8137 return true;
8138 }
8139
8140 /*
8141 * Multiply and multiply accumulate
8142 */
8143
8144 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
8145 {
8146 TCGv_i32 t1, t2;
8147
8148 t1 = load_reg(s, a->rn);
8149 t2 = load_reg(s, a->rm);
8150 tcg_gen_mul_i32(t1, t1, t2);
8151 tcg_temp_free_i32(t2);
8152 if (add) {
8153 t2 = load_reg(s, a->ra);
8154 tcg_gen_add_i32(t1, t1, t2);
8155 tcg_temp_free_i32(t2);
8156 }
8157 if (a->s) {
8158 gen_logic_CC(t1);
8159 }
8160 store_reg(s, a->rd, t1);
8161 return true;
8162 }
8163
8164 static bool trans_MUL(DisasContext *s, arg_MUL *a)
8165 {
8166 return op_mla(s, a, false);
8167 }
8168
8169 static bool trans_MLA(DisasContext *s, arg_MLA *a)
8170 {
8171 return op_mla(s, a, true);
8172 }
8173
8174 static bool trans_MLS(DisasContext *s, arg_MLS *a)
8175 {
8176 TCGv_i32 t1, t2;
8177
8178 if (!ENABLE_ARCH_6T2) {
8179 return false;
8180 }
8181 t1 = load_reg(s, a->rn);
8182 t2 = load_reg(s, a->rm);
8183 tcg_gen_mul_i32(t1, t1, t2);
8184 tcg_temp_free_i32(t2);
8185 t2 = load_reg(s, a->ra);
8186 tcg_gen_sub_i32(t1, t2, t1);
8187 tcg_temp_free_i32(t2);
8188 store_reg(s, a->rd, t1);
8189 return true;
8190 }
8191
8192 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
8193 {
8194 TCGv_i32 t0, t1, t2, t3;
8195
8196 t0 = load_reg(s, a->rm);
8197 t1 = load_reg(s, a->rn);
8198 if (uns) {
8199 tcg_gen_mulu2_i32(t0, t1, t0, t1);
8200 } else {
8201 tcg_gen_muls2_i32(t0, t1, t0, t1);
8202 }
8203 if (add) {
8204 t2 = load_reg(s, a->ra);
8205 t3 = load_reg(s, a->rd);
8206 tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
8207 tcg_temp_free_i32(t2);
8208 tcg_temp_free_i32(t3);
8209 }
8210 if (a->s) {
8211 gen_logicq_cc(t0, t1);
8212 }
8213 store_reg(s, a->ra, t0);
8214 store_reg(s, a->rd, t1);
8215 return true;
8216 }
8217
8218 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
8219 {
8220 return op_mlal(s, a, true, false);
8221 }
8222
8223 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
8224 {
8225 return op_mlal(s, a, false, false);
8226 }
8227
8228 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
8229 {
8230 return op_mlal(s, a, true, true);
8231 }
8232
8233 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
8234 {
8235 return op_mlal(s, a, false, true);
8236 }
8237
8238 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
8239 {
8240 TCGv_i32 t0, t1, t2, zero;
8241
8242 if (s->thumb
8243 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8244 : !ENABLE_ARCH_6) {
8245 return false;
8246 }
8247
8248 t0 = load_reg(s, a->rm);
8249 t1 = load_reg(s, a->rn);
8250 tcg_gen_mulu2_i32(t0, t1, t0, t1);
8251 zero = tcg_const_i32(0);
8252 t2 = load_reg(s, a->ra);
8253 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8254 tcg_temp_free_i32(t2);
8255 t2 = load_reg(s, a->rd);
8256 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8257 tcg_temp_free_i32(t2);
8258 tcg_temp_free_i32(zero);
8259 store_reg(s, a->ra, t0);
8260 store_reg(s, a->rd, t1);
8261 return true;
8262 }
8263
8264 /*
8265 * Saturating addition and subtraction
8266 */
8267
8268 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
8269 {
8270 TCGv_i32 t0, t1;
8271
8272 if (s->thumb
8273 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8274 : !ENABLE_ARCH_5TE) {
8275 return false;
8276 }
8277
8278 t0 = load_reg(s, a->rm);
8279 t1 = load_reg(s, a->rn);
8280 if (doub) {
8281 gen_helper_add_saturate(t1, cpu_env, t1, t1);
8282 }
8283 if (add) {
8284 gen_helper_add_saturate(t0, cpu_env, t0, t1);
8285 } else {
8286 gen_helper_sub_saturate(t0, cpu_env, t0, t1);
8287 }
8288 tcg_temp_free_i32(t1);
8289 store_reg(s, a->rd, t0);
8290 return true;
8291 }
8292
8293 #define DO_QADDSUB(NAME, ADD, DOUB) \
8294 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8295 { \
8296 return op_qaddsub(s, a, ADD, DOUB); \
8297 }
8298
8299 DO_QADDSUB(QADD, true, false)
8300 DO_QADDSUB(QSUB, false, false)
8301 DO_QADDSUB(QDADD, true, true)
8302 DO_QADDSUB(QDSUB, false, true)
8303
8304 #undef DO_QADDSUB
8305
8306 /*
8307 * Halfword multiply and multiply accumulate
8308 */
8309
8310 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
8311 int add_long, bool nt, bool mt)
8312 {
8313 TCGv_i32 t0, t1, tl, th;
8314
8315 if (s->thumb
8316 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8317 : !ENABLE_ARCH_5TE) {
8318 return false;
8319 }
8320
8321 t0 = load_reg(s, a->rn);
8322 t1 = load_reg(s, a->rm);
8323 gen_mulxy(t0, t1, nt, mt);
8324 tcg_temp_free_i32(t1);
8325
8326 switch (add_long) {
8327 case 0:
8328 store_reg(s, a->rd, t0);
8329 break;
8330 case 1:
8331 t1 = load_reg(s, a->ra);
8332 gen_helper_add_setq(t0, cpu_env, t0, t1);
8333 tcg_temp_free_i32(t1);
8334 store_reg(s, a->rd, t0);
8335 break;
8336 case 2:
8337 tl = load_reg(s, a->ra);
8338 th = load_reg(s, a->rd);
8339 /* Sign-extend the 32-bit product to 64 bits. */
8340 t1 = tcg_temp_new_i32();
8341 tcg_gen_sari_i32(t1, t0, 31);
8342 tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
8343 tcg_temp_free_i32(t0);
8344 tcg_temp_free_i32(t1);
8345 store_reg(s, a->ra, tl);
8346 store_reg(s, a->rd, th);
8347 break;
8348 default:
8349 g_assert_not_reached();
8350 }
8351 return true;
8352 }
8353
8354 #define DO_SMLAX(NAME, add, nt, mt) \
8355 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8356 { \
8357 return op_smlaxxx(s, a, add, nt, mt); \
8358 }
8359
8360 DO_SMLAX(SMULBB, 0, 0, 0)
8361 DO_SMLAX(SMULBT, 0, 0, 1)
8362 DO_SMLAX(SMULTB, 0, 1, 0)
8363 DO_SMLAX(SMULTT, 0, 1, 1)
8364
8365 DO_SMLAX(SMLABB, 1, 0, 0)
8366 DO_SMLAX(SMLABT, 1, 0, 1)
8367 DO_SMLAX(SMLATB, 1, 1, 0)
8368 DO_SMLAX(SMLATT, 1, 1, 1)
8369
8370 DO_SMLAX(SMLALBB, 2, 0, 0)
8371 DO_SMLAX(SMLALBT, 2, 0, 1)
8372 DO_SMLAX(SMLALTB, 2, 1, 0)
8373 DO_SMLAX(SMLALTT, 2, 1, 1)
8374
8375 #undef DO_SMLAX
8376
8377 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8378 {
8379 TCGv_i32 t0, t1;
8380
8381 if (!ENABLE_ARCH_5TE) {
8382 return false;
8383 }
8384
8385 t0 = load_reg(s, a->rn);
8386 t1 = load_reg(s, a->rm);
8387 /*
8388 * Since the nominal result is product<47:16>, shift the 16-bit
8389 * input up by 16 bits, so that the result is at product<63:32>.
8390 */
8391 if (mt) {
8392 tcg_gen_andi_i32(t1, t1, 0xffff0000);
8393 } else {
8394 tcg_gen_shli_i32(t1, t1, 16);
8395 }
8396 tcg_gen_muls2_i32(t0, t1, t0, t1);
8397 tcg_temp_free_i32(t0);
8398 if (add) {
8399 t0 = load_reg(s, a->ra);
8400 gen_helper_add_setq(t1, cpu_env, t1, t0);
8401 tcg_temp_free_i32(t0);
8402 }
8403 store_reg(s, a->rd, t1);
8404 return true;
8405 }
8406
8407 #define DO_SMLAWX(NAME, add, mt) \
8408 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8409 { \
8410 return op_smlawx(s, a, add, mt); \
8411 }
8412
8413 DO_SMLAWX(SMULWB, 0, 0)
8414 DO_SMLAWX(SMULWT, 0, 1)
8415 DO_SMLAWX(SMLAWB, 1, 0)
8416 DO_SMLAWX(SMLAWT, 1, 1)
8417
8418 #undef DO_SMLAWX
8419
8420 /*
8421 * MSR (immediate) and hints
8422 */
8423
8424 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8425 {
8426 /*
8427 * When running single-threaded TCG code, use the helper to ensure that
8428 * the next round-robin scheduled vCPU gets a crack. When running in
8429 * MTTCG we don't generate jumps to the helper as it won't affect the
8430 * scheduling of other vCPUs.
8431 */
8432 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8433 gen_set_pc_im(s, s->base.pc_next);
8434 s->base.is_jmp = DISAS_YIELD;
8435 }
8436 return true;
8437 }
8438
8439 static bool trans_WFE(DisasContext *s, arg_WFE *a)
8440 {
8441 /*
8442 * When running single-threaded TCG code, use the helper to ensure that
8443 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
8444 * just skip this instruction. Currently the SEV/SEVL instructions,
8445 * which are *one* of many ways to wake the CPU from WFE, are not
8446 * implemented so we can't sleep like WFI does.
8447 */
8448 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8449 gen_set_pc_im(s, s->base.pc_next);
8450 s->base.is_jmp = DISAS_WFE;
8451 }
8452 return true;
8453 }
8454
8455 static bool trans_WFI(DisasContext *s, arg_WFI *a)
8456 {
8457 /* For WFI, halt the vCPU until an IRQ. */
8458 gen_set_pc_im(s, s->base.pc_next);
8459 s->base.is_jmp = DISAS_WFI;
8460 return true;
8461 }
8462
8463 static bool trans_NOP(DisasContext *s, arg_NOP *a)
8464 {
8465 return true;
8466 }
8467
8468 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8469 {
8470 uint32_t val = ror32(a->imm, a->rot * 2);
8471 uint32_t mask = msr_mask(s, a->mask, a->r);
8472
8473 if (gen_set_psr_im(s, mask, a->r, val)) {
8474 unallocated_encoding(s);
8475 }
8476 return true;
8477 }
8478
8479 /*
8480 * Cyclic Redundancy Check
8481 */
8482
8483 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8484 {
8485 TCGv_i32 t1, t2, t3;
8486
8487 if (!dc_isar_feature(aa32_crc32, s)) {
8488 return false;
8489 }
8490
8491 t1 = load_reg(s, a->rn);
8492 t2 = load_reg(s, a->rm);
8493 switch (sz) {
8494 case MO_8:
8495 gen_uxtb(t2);
8496 break;
8497 case MO_16:
8498 gen_uxth(t2);
8499 break;
8500 case MO_32:
8501 break;
8502 default:
8503 g_assert_not_reached();
8504 }
8505 t3 = tcg_const_i32(1 << sz);
8506 if (c) {
8507 gen_helper_crc32c(t1, t1, t2, t3);
8508 } else {
8509 gen_helper_crc32(t1, t1, t2, t3);
8510 }
8511 tcg_temp_free_i32(t2);
8512 tcg_temp_free_i32(t3);
8513 store_reg(s, a->rd, t1);
8514 return true;
8515 }
8516
8517 #define DO_CRC32(NAME, c, sz) \
8518 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8519 { return op_crc32(s, a, c, sz); }
8520
8521 DO_CRC32(CRC32B, false, MO_8)
8522 DO_CRC32(CRC32H, false, MO_16)
8523 DO_CRC32(CRC32W, false, MO_32)
8524 DO_CRC32(CRC32CB, true, MO_8)
8525 DO_CRC32(CRC32CH, true, MO_16)
8526 DO_CRC32(CRC32CW, true, MO_32)
8527
8528 #undef DO_CRC32
8529
8530 /*
8531 * Miscellaneous instructions
8532 */
8533
8534 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8535 {
8536 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8537 return false;
8538 }
8539 gen_mrs_banked(s, a->r, a->sysm, a->rd);
8540 return true;
8541 }
8542
8543 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8544 {
8545 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8546 return false;
8547 }
8548 gen_msr_banked(s, a->r, a->sysm, a->rn);
8549 return true;
8550 }
8551
8552 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8553 {
8554 TCGv_i32 tmp;
8555
8556 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8557 return false;
8558 }
8559 if (a->r) {
8560 if (IS_USER(s)) {
8561 unallocated_encoding(s);
8562 return true;
8563 }
8564 tmp = load_cpu_field(spsr);
8565 } else {
8566 tmp = tcg_temp_new_i32();
8567 gen_helper_cpsr_read(tmp, cpu_env);
8568 }
8569 store_reg(s, a->rd, tmp);
8570 return true;
8571 }
8572
8573 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8574 {
8575 TCGv_i32 tmp;
8576 uint32_t mask = msr_mask(s, a->mask, a->r);
8577
8578 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8579 return false;
8580 }
8581 tmp = load_reg(s, a->rn);
8582 if (gen_set_psr(s, mask, a->r, tmp)) {
8583 unallocated_encoding(s);
8584 }
8585 return true;
8586 }
8587
8588 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8589 {
8590 TCGv_i32 tmp;
8591
8592 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8593 return false;
8594 }
8595 tmp = tcg_const_i32(a->sysm);
8596 gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8597 store_reg(s, a->rd, tmp);
8598 return true;
8599 }
8600
8601 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8602 {
8603 TCGv_i32 addr, reg;
8604
8605 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8606 return false;
8607 }
8608 addr = tcg_const_i32((a->mask << 10) | a->sysm);
8609 reg = load_reg(s, a->rn);
8610 gen_helper_v7m_msr(cpu_env, addr, reg);
8611 tcg_temp_free_i32(addr);
8612 tcg_temp_free_i32(reg);
8613 /* If we wrote to CONTROL, the EL might have changed */
8614 gen_helper_rebuild_hflags_m32_newel(cpu_env);
8615 gen_lookup_tb(s);
8616 return true;
8617 }
8618
8619 static bool trans_BX(DisasContext *s, arg_BX *a)
8620 {
8621 if (!ENABLE_ARCH_4T) {
8622 return false;
8623 }
8624 gen_bx_excret(s, load_reg(s, a->rm));
8625 return true;
8626 }
8627
8628 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8629 {
8630 if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8631 return false;
8632 }
8633 /* Trivial implementation equivalent to bx. */
8634 gen_bx(s, load_reg(s, a->rm));
8635 return true;
8636 }
8637
8638 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8639 {
8640 TCGv_i32 tmp;
8641
8642 if (!ENABLE_ARCH_5) {
8643 return false;
8644 }
8645 tmp = load_reg(s, a->rm);
8646 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8647 gen_bx(s, tmp);
8648 return true;
8649 }
8650
8651 /*
8652 * BXNS/BLXNS: only exist for v8M with the security extensions,
8653 * and always UNDEF if NonSecure. We don't implement these in
8654 * the user-only mode either (in theory you can use them from
8655 * Secure User mode but they are too tied in to system emulation).
8656 */
8657 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8658 {
8659 if (!s->v8m_secure || IS_USER_ONLY) {
8660 unallocated_encoding(s);
8661 } else {
8662 gen_bxns(s, a->rm);
8663 }
8664 return true;
8665 }
8666
8667 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8668 {
8669 if (!s->v8m_secure || IS_USER_ONLY) {
8670 unallocated_encoding(s);
8671 } else {
8672 gen_blxns(s, a->rm);
8673 }
8674 return true;
8675 }
8676
8677 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8678 {
8679 TCGv_i32 tmp;
8680
8681 if (!ENABLE_ARCH_5) {
8682 return false;
8683 }
8684 tmp = load_reg(s, a->rm);
8685 tcg_gen_clzi_i32(tmp, tmp, 32);
8686 store_reg(s, a->rd, tmp);
8687 return true;
8688 }
8689
8690 static bool trans_ERET(DisasContext *s, arg_ERET *a)
8691 {
8692 TCGv_i32 tmp;
8693
8694 if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8695 return false;
8696 }
8697 if (IS_USER(s)) {
8698 unallocated_encoding(s);
8699 return true;
8700 }
8701 if (s->current_el == 2) {
8702 /* ERET from Hyp uses ELR_Hyp, not LR */
8703 tmp = load_cpu_field(elr_el[2]);
8704 } else {
8705 tmp = load_reg(s, 14);
8706 }
8707 gen_exception_return(s, tmp);
8708 return true;
8709 }
8710
8711 static bool trans_HLT(DisasContext *s, arg_HLT *a)
8712 {
8713 gen_hlt(s, a->imm);
8714 return true;
8715 }
8716
8717 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8718 {
8719 if (!ENABLE_ARCH_5) {
8720 return false;
8721 }
8722 if (arm_dc_feature(s, ARM_FEATURE_M) &&
8723 semihosting_enabled() &&
8724 #ifndef CONFIG_USER_ONLY
8725 !IS_USER(s) &&
8726 #endif
8727 (a->imm == 0xab)) {
8728 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8729 } else {
8730 gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8731 }
8732 return true;
8733 }
8734
8735 static bool trans_HVC(DisasContext *s, arg_HVC *a)
8736 {
8737 if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8738 return false;
8739 }
8740 if (IS_USER(s)) {
8741 unallocated_encoding(s);
8742 } else {
8743 gen_hvc(s, a->imm);
8744 }
8745 return true;
8746 }
8747
8748 static bool trans_SMC(DisasContext *s, arg_SMC *a)
8749 {
8750 if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8751 return false;
8752 }
8753 if (IS_USER(s)) {
8754 unallocated_encoding(s);
8755 } else {
8756 gen_smc(s);
8757 }
8758 return true;
8759 }
8760
8761 static bool trans_SG(DisasContext *s, arg_SG *a)
8762 {
8763 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8764 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8765 return false;
8766 }
8767 /*
8768 * SG (v8M only)
8769 * The bulk of the behaviour for this instruction is implemented
8770 * in v7m_handle_execute_nsc(), which deals with the insn when
8771 * it is executed by a CPU in non-secure state from memory
8772 * which is Secure & NonSecure-Callable.
8773 * Here we only need to handle the remaining cases:
8774 * * in NS memory (including the "security extension not
8775 * implemented" case) : NOP
8776 * * in S memory but CPU already secure (clear IT bits)
8777 * We know that the attribute for the memory this insn is
8778 * in must match the current CPU state, because otherwise
8779 * get_phys_addr_pmsav8 would have generated an exception.
8780 */
8781 if (s->v8m_secure) {
8782 /* Like the IT insn, we don't need to generate any code */
8783 s->condexec_cond = 0;
8784 s->condexec_mask = 0;
8785 }
8786 return true;
8787 }
8788
8789 static bool trans_TT(DisasContext *s, arg_TT *a)
8790 {
8791 TCGv_i32 addr, tmp;
8792
8793 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8794 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8795 return false;
8796 }
8797 if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8798 /* We UNDEF for these UNPREDICTABLE cases */
8799 unallocated_encoding(s);
8800 return true;
8801 }
8802 if (a->A && !s->v8m_secure) {
8803 /* This case is UNDEFINED. */
8804 unallocated_encoding(s);
8805 return true;
8806 }
8807
8808 addr = load_reg(s, a->rn);
8809 tmp = tcg_const_i32((a->A << 1) | a->T);
8810 gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8811 tcg_temp_free_i32(addr);
8812 store_reg(s, a->rd, tmp);
8813 return true;
8814 }
8815
8816 /*
8817 * Load/store register index
8818 */
8819
8820 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8821 {
8822 ISSInfo ret;
8823
8824 /* ISS not valid if writeback */
8825 if (p && !w) {
8826 ret = rd;
8827 if (s->base.pc_next - s->pc_curr == 2) {
8828 ret |= ISSIs16Bit;
8829 }
8830 } else {
8831 ret = ISSInvalid;
8832 }
8833 return ret;
8834 }
8835
8836 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8837 {
8838 TCGv_i32 addr = load_reg(s, a->rn);
8839
8840 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8841 gen_helper_v8m_stackcheck(cpu_env, addr);
8842 }
8843
8844 if (a->p) {
8845 TCGv_i32 ofs = load_reg(s, a->rm);
8846 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8847 if (a->u) {
8848 tcg_gen_add_i32(addr, addr, ofs);
8849 } else {
8850 tcg_gen_sub_i32(addr, addr, ofs);
8851 }
8852 tcg_temp_free_i32(ofs);
8853 }
8854 return addr;
8855 }
8856
8857 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8858 TCGv_i32 addr, int address_offset)
8859 {
8860 if (!a->p) {
8861 TCGv_i32 ofs = load_reg(s, a->rm);
8862 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8863 if (a->u) {
8864 tcg_gen_add_i32(addr, addr, ofs);
8865 } else {
8866 tcg_gen_sub_i32(addr, addr, ofs);
8867 }
8868 tcg_temp_free_i32(ofs);
8869 } else if (!a->w) {
8870 tcg_temp_free_i32(addr);
8871 return;
8872 }
8873 tcg_gen_addi_i32(addr, addr, address_offset);
8874 store_reg(s, a->rn, addr);
8875 }
8876
8877 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8878 MemOp mop, int mem_idx)
8879 {
8880 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8881 TCGv_i32 addr, tmp;
8882
8883 addr = op_addr_rr_pre(s, a);
8884
8885 tmp = tcg_temp_new_i32();
8886 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8887 disas_set_da_iss(s, mop, issinfo);
8888
8889 /*
8890 * Perform base writeback before the loaded value to
8891 * ensure correct behavior with overlapping index registers.
8892 */
8893 op_addr_rr_post(s, a, addr, 0);
8894 store_reg_from_load(s, a->rt, tmp);
8895 return true;
8896 }
8897
8898 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8899 MemOp mop, int mem_idx)
8900 {
8901 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8902 TCGv_i32 addr, tmp;
8903
8904 addr = op_addr_rr_pre(s, a);
8905
8906 tmp = load_reg(s, a->rt);
8907 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8908 disas_set_da_iss(s, mop, issinfo);
8909 tcg_temp_free_i32(tmp);
8910
8911 op_addr_rr_post(s, a, addr, 0);
8912 return true;
8913 }
8914
8915 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8916 {
8917 int mem_idx = get_mem_index(s);
8918 TCGv_i32 addr, tmp;
8919
8920 if (!ENABLE_ARCH_5TE) {
8921 return false;
8922 }
8923 if (a->rt & 1) {
8924 unallocated_encoding(s);
8925 return true;
8926 }
8927 addr = op_addr_rr_pre(s, a);
8928
8929 tmp = tcg_temp_new_i32();
8930 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8931 store_reg(s, a->rt, tmp);
8932
8933 tcg_gen_addi_i32(addr, addr, 4);
8934
8935 tmp = tcg_temp_new_i32();
8936 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8937 store_reg(s, a->rt + 1, tmp);
8938
8939 /* LDRD w/ base writeback is undefined if the registers overlap. */
8940 op_addr_rr_post(s, a, addr, -4);
8941 return true;
8942 }
8943
8944 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8945 {
8946 int mem_idx = get_mem_index(s);
8947 TCGv_i32 addr, tmp;
8948
8949 if (!ENABLE_ARCH_5TE) {
8950 return false;
8951 }
8952 if (a->rt & 1) {
8953 unallocated_encoding(s);
8954 return true;
8955 }
8956 addr = op_addr_rr_pre(s, a);
8957
8958 tmp = load_reg(s, a->rt);
8959 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8960 tcg_temp_free_i32(tmp);
8961
8962 tcg_gen_addi_i32(addr, addr, 4);
8963
8964 tmp = load_reg(s, a->rt + 1);
8965 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8966 tcg_temp_free_i32(tmp);
8967
8968 op_addr_rr_post(s, a, addr, -4);
8969 return true;
8970 }
8971
8972 /*
8973 * Load/store immediate index
8974 */
8975
8976 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8977 {
8978 int ofs = a->imm;
8979
8980 if (!a->u) {
8981 ofs = -ofs;
8982 }
8983
8984 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8985 /*
8986 * Stackcheck. Here we know 'addr' is the current SP;
8987 * U is set if we're moving SP up, else down. It is
8988 * UNKNOWN whether the limit check triggers when SP starts
8989 * below the limit and ends up above it; we chose to do so.
8990 */
8991 if (!a->u) {
8992 TCGv_i32 newsp = tcg_temp_new_i32();
8993 tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8994 gen_helper_v8m_stackcheck(cpu_env, newsp);
8995 tcg_temp_free_i32(newsp);
8996 } else {
8997 gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8998 }
8999 }
9000
9001 return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
9002 }
9003
9004 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
9005 TCGv_i32 addr, int address_offset)
9006 {
9007 if (!a->p) {
9008 if (a->u) {
9009 address_offset += a->imm;
9010 } else {
9011 address_offset -= a->imm;
9012 }
9013 } else if (!a->w) {
9014 tcg_temp_free_i32(addr);
9015 return;
9016 }
9017 tcg_gen_addi_i32(addr, addr, address_offset);
9018 store_reg(s, a->rn, addr);
9019 }
9020
9021 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
9022 MemOp mop, int mem_idx)
9023 {
9024 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
9025 TCGv_i32 addr, tmp;
9026
9027 addr = op_addr_ri_pre(s, a);
9028
9029 tmp = tcg_temp_new_i32();
9030 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
9031 disas_set_da_iss(s, mop, issinfo);
9032
9033 /*
9034 * Perform base writeback before the loaded value to
9035 * ensure correct behavior with overlapping index registers.
9036 */
9037 op_addr_ri_post(s, a, addr, 0);
9038 store_reg_from_load(s, a->rt, tmp);
9039 return true;
9040 }
9041
9042 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
9043 MemOp mop, int mem_idx)
9044 {
9045 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
9046 TCGv_i32 addr, tmp;
9047
9048 addr = op_addr_ri_pre(s, a);
9049
9050 tmp = load_reg(s, a->rt);
9051 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
9052 disas_set_da_iss(s, mop, issinfo);
9053 tcg_temp_free_i32(tmp);
9054
9055 op_addr_ri_post(s, a, addr, 0);
9056 return true;
9057 }
9058
9059 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
9060 {
9061 int mem_idx = get_mem_index(s);
9062 TCGv_i32 addr, tmp;
9063
9064 addr = op_addr_ri_pre(s, a);
9065
9066 tmp = tcg_temp_new_i32();
9067 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9068 store_reg(s, a->rt, tmp);
9069
9070 tcg_gen_addi_i32(addr, addr, 4);
9071
9072 tmp = tcg_temp_new_i32();
9073 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9074 store_reg(s, rt2, tmp);
9075
9076 /* LDRD w/ base writeback is undefined if the registers overlap. */
9077 op_addr_ri_post(s, a, addr, -4);
9078 return true;
9079 }
9080
9081 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
9082 {
9083 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
9084 return false;
9085 }
9086 return op_ldrd_ri(s, a, a->rt + 1);
9087 }
9088
9089 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
9090 {
9091 arg_ldst_ri b = {
9092 .u = a->u, .w = a->w, .p = a->p,
9093 .rn = a->rn, .rt = a->rt, .imm = a->imm
9094 };
9095 return op_ldrd_ri(s, &b, a->rt2);
9096 }
9097
9098 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
9099 {
9100 int mem_idx = get_mem_index(s);
9101 TCGv_i32 addr, tmp;
9102
9103 addr = op_addr_ri_pre(s, a);
9104
9105 tmp = load_reg(s, a->rt);
9106 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9107 tcg_temp_free_i32(tmp);
9108
9109 tcg_gen_addi_i32(addr, addr, 4);
9110
9111 tmp = load_reg(s, rt2);
9112 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9113 tcg_temp_free_i32(tmp);
9114
9115 op_addr_ri_post(s, a, addr, -4);
9116 return true;
9117 }
9118
9119 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
9120 {
9121 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
9122 return false;
9123 }
9124 return op_strd_ri(s, a, a->rt + 1);
9125 }
9126
9127 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
9128 {
9129 arg_ldst_ri b = {
9130 .u = a->u, .w = a->w, .p = a->p,
9131 .rn = a->rn, .rt = a->rt, .imm = a->imm
9132 };
9133 return op_strd_ri(s, &b, a->rt2);
9134 }
9135
9136 #define DO_LDST(NAME, WHICH, MEMOP) \
9137 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a) \
9138 { \
9139 return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s)); \
9140 } \
9141 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a) \
9142 { \
9143 return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s)); \
9144 } \
9145 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a) \
9146 { \
9147 return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s)); \
9148 } \
9149 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a) \
9150 { \
9151 return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s)); \
9152 }
9153
9154 DO_LDST(LDR, load, MO_UL)
9155 DO_LDST(LDRB, load, MO_UB)
9156 DO_LDST(LDRH, load, MO_UW)
9157 DO_LDST(LDRSB, load, MO_SB)
9158 DO_LDST(LDRSH, load, MO_SW)
9159
9160 DO_LDST(STR, store, MO_UL)
9161 DO_LDST(STRB, store, MO_UB)
9162 DO_LDST(STRH, store, MO_UW)
9163
9164 #undef DO_LDST
9165
9166 /*
9167 * Synchronization primitives
9168 */
9169
9170 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
9171 {
9172 TCGv_i32 addr, tmp;
9173 TCGv taddr;
9174
9175 opc |= s->be_data;
9176 addr = load_reg(s, a->rn);
9177 taddr = gen_aa32_addr(s, addr, opc);
9178 tcg_temp_free_i32(addr);
9179
9180 tmp = load_reg(s, a->rt2);
9181 tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
9182 tcg_temp_free(taddr);
9183
9184 store_reg(s, a->rt, tmp);
9185 return true;
9186 }
9187
9188 static bool trans_SWP(DisasContext *s, arg_SWP *a)
9189 {
9190 return op_swp(s, a, MO_UL | MO_ALIGN);
9191 }
9192
9193 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
9194 {
9195 return op_swp(s, a, MO_UB);
9196 }
9197
9198 /*
9199 * Load/Store Exclusive and Load-Acquire/Store-Release
9200 */
9201
9202 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
9203 {
9204 TCGv_i32 addr;
9205 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9206 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9207
9208 /* We UNDEF for these UNPREDICTABLE cases. */
9209 if (a->rd == 15 || a->rn == 15 || a->rt == 15
9210 || a->rd == a->rn || a->rd == a->rt
9211 || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
9212 || (mop == MO_64
9213 && (a->rt2 == 15
9214 || a->rd == a->rt2
9215 || (!v8a && s->thumb && a->rt2 == 13)))) {
9216 unallocated_encoding(s);
9217 return true;
9218 }
9219
9220 if (rel) {
9221 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9222 }
9223
9224 addr = tcg_temp_local_new_i32();
9225 load_reg_var(s, addr, a->rn);
9226 tcg_gen_addi_i32(addr, addr, a->imm);
9227
9228 gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
9229 tcg_temp_free_i32(addr);
9230 return true;
9231 }
9232
9233 static bool trans_STREX(DisasContext *s, arg_STREX *a)
9234 {
9235 if (!ENABLE_ARCH_6) {
9236 return false;
9237 }
9238 return op_strex(s, a, MO_32, false);
9239 }
9240
9241 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
9242 {
9243 if (!ENABLE_ARCH_6K) {
9244 return false;
9245 }
9246 /* We UNDEF for these UNPREDICTABLE cases. */
9247 if (a->rt & 1) {
9248 unallocated_encoding(s);
9249 return true;
9250 }
9251 a->rt2 = a->rt + 1;
9252 return op_strex(s, a, MO_64, false);
9253 }
9254
9255 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
9256 {
9257 return op_strex(s, a, MO_64, false);
9258 }
9259
9260 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
9261 {
9262 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9263 return false;
9264 }
9265 return op_strex(s, a, MO_8, false);
9266 }
9267
9268 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
9269 {
9270 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9271 return false;
9272 }
9273 return op_strex(s, a, MO_16, false);
9274 }
9275
9276 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
9277 {
9278 if (!ENABLE_ARCH_8) {
9279 return false;
9280 }
9281 return op_strex(s, a, MO_32, true);
9282 }
9283
9284 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
9285 {
9286 if (!ENABLE_ARCH_8) {
9287 return false;
9288 }
9289 /* We UNDEF for these UNPREDICTABLE cases. */
9290 if (a->rt & 1) {
9291 unallocated_encoding(s);
9292 return true;
9293 }
9294 a->rt2 = a->rt + 1;
9295 return op_strex(s, a, MO_64, true);
9296 }
9297
9298 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
9299 {
9300 if (!ENABLE_ARCH_8) {
9301 return false;
9302 }
9303 return op_strex(s, a, MO_64, true);
9304 }
9305
9306 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
9307 {
9308 if (!ENABLE_ARCH_8) {
9309 return false;
9310 }
9311 return op_strex(s, a, MO_8, true);
9312 }
9313
9314 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
9315 {
9316 if (!ENABLE_ARCH_8) {
9317 return false;
9318 }
9319 return op_strex(s, a, MO_16, true);
9320 }
9321
9322 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
9323 {
9324 TCGv_i32 addr, tmp;
9325
9326 if (!ENABLE_ARCH_8) {
9327 return false;
9328 }
9329 /* We UNDEF for these UNPREDICTABLE cases. */
9330 if (a->rn == 15 || a->rt == 15) {
9331 unallocated_encoding(s);
9332 return true;
9333 }
9334
9335 addr = load_reg(s, a->rn);
9336 tmp = load_reg(s, a->rt);
9337 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9338 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9339 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
9340
9341 tcg_temp_free_i32(tmp);
9342 tcg_temp_free_i32(addr);
9343 return true;
9344 }
9345
9346 static bool trans_STL(DisasContext *s, arg_STL *a)
9347 {
9348 return op_stl(s, a, MO_UL);
9349 }
9350
9351 static bool trans_STLB(DisasContext *s, arg_STL *a)
9352 {
9353 return op_stl(s, a, MO_UB);
9354 }
9355
9356 static bool trans_STLH(DisasContext *s, arg_STL *a)
9357 {
9358 return op_stl(s, a, MO_UW);
9359 }
9360
9361 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
9362 {
9363 TCGv_i32 addr;
9364 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9365 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9366
9367 /* We UNDEF for these UNPREDICTABLE cases. */
9368 if (a->rn == 15 || a->rt == 15
9369 || (!v8a && s->thumb && a->rt == 13)
9370 || (mop == MO_64
9371 && (a->rt2 == 15 || a->rt == a->rt2
9372 || (!v8a && s->thumb && a->rt2 == 13)))) {
9373 unallocated_encoding(s);
9374 return true;
9375 }
9376
9377 addr = tcg_temp_local_new_i32();
9378 load_reg_var(s, addr, a->rn);
9379 tcg_gen_addi_i32(addr, addr, a->imm);
9380
9381 gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9382 tcg_temp_free_i32(addr);
9383
9384 if (acq) {
9385 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9386 }
9387 return true;
9388 }
9389
9390 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9391 {
9392 if (!ENABLE_ARCH_6) {
9393 return false;
9394 }
9395 return op_ldrex(s, a, MO_32, false);
9396 }
9397
9398 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9399 {
9400 if (!ENABLE_ARCH_6K) {
9401 return false;
9402 }
9403 /* We UNDEF for these UNPREDICTABLE cases. */
9404 if (a->rt & 1) {
9405 unallocated_encoding(s);
9406 return true;
9407 }
9408 a->rt2 = a->rt + 1;
9409 return op_ldrex(s, a, MO_64, false);
9410 }
9411
9412 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9413 {
9414 return op_ldrex(s, a, MO_64, false);
9415 }
9416
9417 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9418 {
9419 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9420 return false;
9421 }
9422 return op_ldrex(s, a, MO_8, false);
9423 }
9424
9425 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9426 {
9427 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9428 return false;
9429 }
9430 return op_ldrex(s, a, MO_16, false);
9431 }
9432
9433 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9434 {
9435 if (!ENABLE_ARCH_8) {
9436 return false;
9437 }
9438 return op_ldrex(s, a, MO_32, true);
9439 }
9440
9441 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9442 {
9443 if (!ENABLE_ARCH_8) {
9444 return false;
9445 }
9446 /* We UNDEF for these UNPREDICTABLE cases. */
9447 if (a->rt & 1) {
9448 unallocated_encoding(s);
9449 return true;
9450 }
9451 a->rt2 = a->rt + 1;
9452 return op_ldrex(s, a, MO_64, true);
9453 }
9454
9455 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9456 {
9457 if (!ENABLE_ARCH_8) {
9458 return false;
9459 }
9460 return op_ldrex(s, a, MO_64, true);
9461 }
9462
9463 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9464 {
9465 if (!ENABLE_ARCH_8) {
9466 return false;
9467 }
9468 return op_ldrex(s, a, MO_8, true);
9469 }
9470
9471 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9472 {
9473 if (!ENABLE_ARCH_8) {
9474 return false;
9475 }
9476 return op_ldrex(s, a, MO_16, true);
9477 }
9478
9479 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9480 {
9481 TCGv_i32 addr, tmp;
9482
9483 if (!ENABLE_ARCH_8) {
9484 return false;
9485 }
9486 /* We UNDEF for these UNPREDICTABLE cases. */
9487 if (a->rn == 15 || a->rt == 15) {
9488 unallocated_encoding(s);
9489 return true;
9490 }
9491
9492 addr = load_reg(s, a->rn);
9493 tmp = tcg_temp_new_i32();
9494 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9495 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9496 tcg_temp_free_i32(addr);
9497
9498 store_reg(s, a->rt, tmp);
9499 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9500 return true;
9501 }
9502
9503 static bool trans_LDA(DisasContext *s, arg_LDA *a)
9504 {
9505 return op_lda(s, a, MO_UL);
9506 }
9507
9508 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9509 {
9510 return op_lda(s, a, MO_UB);
9511 }
9512
9513 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9514 {
9515 return op_lda(s, a, MO_UW);
9516 }
9517
9518 /*
9519 * Media instructions
9520 */
9521
9522 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9523 {
9524 TCGv_i32 t1, t2;
9525
9526 if (!ENABLE_ARCH_6) {
9527 return false;
9528 }
9529
9530 t1 = load_reg(s, a->rn);
9531 t2 = load_reg(s, a->rm);
9532 gen_helper_usad8(t1, t1, t2);
9533 tcg_temp_free_i32(t2);
9534 if (a->ra != 15) {
9535 t2 = load_reg(s, a->ra);
9536 tcg_gen_add_i32(t1, t1, t2);
9537 tcg_temp_free_i32(t2);
9538 }
9539 store_reg(s, a->rd, t1);
9540 return true;
9541 }
9542
9543 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9544 {
9545 TCGv_i32 tmp;
9546 int width = a->widthm1 + 1;
9547 int shift = a->lsb;
9548
9549 if (!ENABLE_ARCH_6T2) {
9550 return false;
9551 }
9552 if (shift + width > 32) {
9553 /* UNPREDICTABLE; we choose to UNDEF */
9554 unallocated_encoding(s);
9555 return true;
9556 }
9557
9558 tmp = load_reg(s, a->rn);
9559 if (u) {
9560 tcg_gen_extract_i32(tmp, tmp, shift, width);
9561 } else {
9562 tcg_gen_sextract_i32(tmp, tmp, shift, width);
9563 }
9564 store_reg(s, a->rd, tmp);
9565 return true;
9566 }
9567
9568 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9569 {
9570 return op_bfx(s, a, false);
9571 }
9572
9573 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9574 {
9575 return op_bfx(s, a, true);
9576 }
9577
9578 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9579 {
9580 TCGv_i32 tmp;
9581 int msb = a->msb, lsb = a->lsb;
9582 int width;
9583
9584 if (!ENABLE_ARCH_6T2) {
9585 return false;
9586 }
9587 if (msb < lsb) {
9588 /* UNPREDICTABLE; we choose to UNDEF */
9589 unallocated_encoding(s);
9590 return true;
9591 }
9592
9593 width = msb + 1 - lsb;
9594 if (a->rn == 15) {
9595 /* BFC */
9596 tmp = tcg_const_i32(0);
9597 } else {
9598 /* BFI */
9599 tmp = load_reg(s, a->rn);
9600 }
9601 if (width != 32) {
9602 TCGv_i32 tmp2 = load_reg(s, a->rd);
9603 tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9604 tcg_temp_free_i32(tmp2);
9605 }
9606 store_reg(s, a->rd, tmp);
9607 return true;
9608 }
9609
9610 static bool trans_UDF(DisasContext *s, arg_UDF *a)
9611 {
9612 unallocated_encoding(s);
9613 return true;
9614 }
9615
9616 /*
9617 * Parallel addition and subtraction
9618 */
9619
9620 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9621 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9622 {
9623 TCGv_i32 t0, t1;
9624
9625 if (s->thumb
9626 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9627 : !ENABLE_ARCH_6) {
9628 return false;
9629 }
9630
9631 t0 = load_reg(s, a->rn);
9632 t1 = load_reg(s, a->rm);
9633
9634 gen(t0, t0, t1);
9635
9636 tcg_temp_free_i32(t1);
9637 store_reg(s, a->rd, t0);
9638 return true;
9639 }
9640
9641 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9642 void (*gen)(TCGv_i32, TCGv_i32,
9643 TCGv_i32, TCGv_ptr))
9644 {
9645 TCGv_i32 t0, t1;
9646 TCGv_ptr ge;
9647
9648 if (s->thumb
9649 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9650 : !ENABLE_ARCH_6) {
9651 return false;
9652 }
9653
9654 t0 = load_reg(s, a->rn);
9655 t1 = load_reg(s, a->rm);
9656
9657 ge = tcg_temp_new_ptr();
9658 tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9659 gen(t0, t0, t1, ge);
9660
9661 tcg_temp_free_ptr(ge);
9662 tcg_temp_free_i32(t1);
9663 store_reg(s, a->rd, t0);
9664 return true;
9665 }
9666
9667 #define DO_PAR_ADDSUB(NAME, helper) \
9668 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9669 { \
9670 return op_par_addsub(s, a, helper); \
9671 }
9672
9673 #define DO_PAR_ADDSUB_GE(NAME, helper) \
9674 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9675 { \
9676 return op_par_addsub_ge(s, a, helper); \
9677 }
9678
9679 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9680 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9681 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9682 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9683 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9684 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9685
9686 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9687 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9688 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9689 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9690 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9691 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9692
9693 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9694 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9695 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9696 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9697 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9698 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9699
9700 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9701 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9702 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9703 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9704 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9705 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9706
9707 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9708 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9709 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9710 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9711 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9712 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9713
9714 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9715 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9716 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9717 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9718 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9719 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9720
9721 #undef DO_PAR_ADDSUB
9722 #undef DO_PAR_ADDSUB_GE
9723
9724 /*
9725 * Packing, unpacking, saturation, and reversal
9726 */
9727
9728 static bool trans_PKH(DisasContext *s, arg_PKH *a)
9729 {
9730 TCGv_i32 tn, tm;
9731 int shift = a->imm;
9732
9733 if (s->thumb
9734 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9735 : !ENABLE_ARCH_6) {
9736 return false;
9737 }
9738
9739 tn = load_reg(s, a->rn);
9740 tm = load_reg(s, a->rm);
9741 if (a->tb) {
9742 /* PKHTB */
9743 if (shift == 0) {
9744 shift = 31;
9745 }
9746 tcg_gen_sari_i32(tm, tm, shift);
9747 tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9748 } else {
9749 /* PKHBT */
9750 tcg_gen_shli_i32(tm, tm, shift);
9751 tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9752 }
9753 tcg_temp_free_i32(tm);
9754 store_reg(s, a->rd, tn);
9755 return true;
9756 }
9757
9758 static bool op_sat(DisasContext *s, arg_sat *a,
9759 void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9760 {
9761 TCGv_i32 tmp, satimm;
9762 int shift = a->imm;
9763
9764 if (!ENABLE_ARCH_6) {
9765 return false;
9766 }
9767
9768 tmp = load_reg(s, a->rn);
9769 if (a->sh) {
9770 tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9771 } else {
9772 tcg_gen_shli_i32(tmp, tmp, shift);
9773 }
9774
9775 satimm = tcg_const_i32(a->satimm);
9776 gen(tmp, cpu_env, tmp, satimm);
9777 tcg_temp_free_i32(satimm);
9778
9779 store_reg(s, a->rd, tmp);
9780 return true;
9781 }
9782
9783 static bool trans_SSAT(DisasContext *s, arg_sat *a)
9784 {
9785 return op_sat(s, a, gen_helper_ssat);
9786 }
9787
9788 static bool trans_USAT(DisasContext *s, arg_sat *a)
9789 {
9790 return op_sat(s, a, gen_helper_usat);
9791 }
9792
9793 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9794 {
9795 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9796 return false;
9797 }
9798 return op_sat(s, a, gen_helper_ssat16);
9799 }
9800
9801 static bool trans_USAT16(DisasContext *s, arg_sat *a)
9802 {
9803 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9804 return false;
9805 }
9806 return op_sat(s, a, gen_helper_usat16);
9807 }
9808
9809 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9810 void (*gen_extract)(TCGv_i32, TCGv_i32),
9811 void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9812 {
9813 TCGv_i32 tmp;
9814
9815 if (!ENABLE_ARCH_6) {
9816 return false;
9817 }
9818
9819 tmp = load_reg(s, a->rm);
9820 /*
9821 * TODO: In many cases we could do a shift instead of a rotate.
9822 * Combined with a simple extend, that becomes an extract.
9823 */
9824 tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9825 gen_extract(tmp, tmp);
9826
9827 if (a->rn != 15) {
9828 TCGv_i32 tmp2 = load_reg(s, a->rn);
9829 gen_add(tmp, tmp, tmp2);
9830 tcg_temp_free_i32(tmp2);
9831 }
9832 store_reg(s, a->rd, tmp);
9833 return true;
9834 }
9835
9836 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9837 {
9838 return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9839 }
9840
9841 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9842 {
9843 return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9844 }
9845
9846 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9847 {
9848 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9849 return false;
9850 }
9851 return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9852 }
9853
9854 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9855 {
9856 return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9857 }
9858
9859 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9860 {
9861 return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9862 }
9863
9864 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9865 {
9866 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9867 return false;
9868 }
9869 return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9870 }
9871
9872 static bool trans_SEL(DisasContext *s, arg_rrr *a)
9873 {
9874 TCGv_i32 t1, t2, t3;
9875
9876 if (s->thumb
9877 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9878 : !ENABLE_ARCH_6) {
9879 return false;
9880 }
9881
9882 t1 = load_reg(s, a->rn);
9883 t2 = load_reg(s, a->rm);
9884 t3 = tcg_temp_new_i32();
9885 tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9886 gen_helper_sel_flags(t1, t3, t1, t2);
9887 tcg_temp_free_i32(t3);
9888 tcg_temp_free_i32(t2);
9889 store_reg(s, a->rd, t1);
9890 return true;
9891 }
9892
9893 static bool op_rr(DisasContext *s, arg_rr *a,
9894 void (*gen)(TCGv_i32, TCGv_i32))
9895 {
9896 TCGv_i32 tmp;
9897
9898 tmp = load_reg(s, a->rm);
9899 gen(tmp, tmp);
9900 store_reg(s, a->rd, tmp);
9901 return true;
9902 }
9903
9904 static bool trans_REV(DisasContext *s, arg_rr *a)
9905 {
9906 if (!ENABLE_ARCH_6) {
9907 return false;
9908 }
9909 return op_rr(s, a, tcg_gen_bswap32_i32);
9910 }
9911
9912 static bool trans_REV16(DisasContext *s, arg_rr *a)
9913 {
9914 if (!ENABLE_ARCH_6) {
9915 return false;
9916 }
9917 return op_rr(s, a, gen_rev16);
9918 }
9919
9920 static bool trans_REVSH(DisasContext *s, arg_rr *a)
9921 {
9922 if (!ENABLE_ARCH_6) {
9923 return false;
9924 }
9925 return op_rr(s, a, gen_revsh);
9926 }
9927
9928 static bool trans_RBIT(DisasContext *s, arg_rr *a)
9929 {
9930 if (!ENABLE_ARCH_6T2) {
9931 return false;
9932 }
9933 return op_rr(s, a, gen_helper_rbit);
9934 }
9935
9936 /*
9937 * Signed multiply, signed and unsigned divide
9938 */
9939
9940 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9941 {
9942 TCGv_i32 t1, t2;
9943
9944 if (!ENABLE_ARCH_6) {
9945 return false;
9946 }
9947
9948 t1 = load_reg(s, a->rn);
9949 t2 = load_reg(s, a->rm);
9950 if (m_swap) {
9951 gen_swap_half(t2);
9952 }
9953 gen_smul_dual(t1, t2);
9954
9955 if (sub) {
9956 /* This subtraction cannot overflow. */
9957 tcg_gen_sub_i32(t1, t1, t2);
9958 } else {
9959 /*
9960 * This addition cannot overflow 32 bits; however it may
9961 * overflow considered as a signed operation, in which case
9962 * we must set the Q flag.
9963 */
9964 gen_helper_add_setq(t1, cpu_env, t1, t2);
9965 }
9966 tcg_temp_free_i32(t2);
9967
9968 if (a->ra != 15) {
9969 t2 = load_reg(s, a->ra);
9970 gen_helper_add_setq(t1, cpu_env, t1, t2);
9971 tcg_temp_free_i32(t2);
9972 }
9973 store_reg(s, a->rd, t1);
9974 return true;
9975 }
9976
9977 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9978 {
9979 return op_smlad(s, a, false, false);
9980 }
9981
9982 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9983 {
9984 return op_smlad(s, a, true, false);
9985 }
9986
9987 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9988 {
9989 return op_smlad(s, a, false, true);
9990 }
9991
9992 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9993 {
9994 return op_smlad(s, a, true, true);
9995 }
9996
9997 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9998 {
9999 TCGv_i32 t1, t2;
10000 TCGv_i64 l1, l2;
10001
10002 if (!ENABLE_ARCH_6) {
10003 return false;
10004 }
10005
10006 t1 = load_reg(s, a->rn);
10007 t2 = load_reg(s, a->rm);
10008 if (m_swap) {
10009 gen_swap_half(t2);
10010 }
10011 gen_smul_dual(t1, t2);
10012
10013 l1 = tcg_temp_new_i64();
10014 l2 = tcg_temp_new_i64();
10015 tcg_gen_ext_i32_i64(l1, t1);
10016 tcg_gen_ext_i32_i64(l2, t2);
10017 tcg_temp_free_i32(t1);
10018 tcg_temp_free_i32(t2);
10019
10020 if (sub) {
10021 tcg_gen_sub_i64(l1, l1, l2);
10022 } else {
10023 tcg_gen_add_i64(l1, l1, l2);
10024 }
10025 tcg_temp_free_i64(l2);
10026
10027 gen_addq(s, l1, a->ra, a->rd);
10028 gen_storeq_reg(s, a->ra, a->rd, l1);
10029 tcg_temp_free_i64(l1);
10030 return true;
10031 }
10032
10033 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
10034 {
10035 return op_smlald(s, a, false, false);
10036 }
10037
10038 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
10039 {
10040 return op_smlald(s, a, true, false);
10041 }
10042
10043 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
10044 {
10045 return op_smlald(s, a, false, true);
10046 }
10047
10048 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
10049 {
10050 return op_smlald(s, a, true, true);
10051 }
10052
10053 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
10054 {
10055 TCGv_i32 t1, t2;
10056
10057 if (s->thumb
10058 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
10059 : !ENABLE_ARCH_6) {
10060 return false;
10061 }
10062
10063 t1 = load_reg(s, a->rn);
10064 t2 = load_reg(s, a->rm);
10065 tcg_gen_muls2_i32(t2, t1, t1, t2);
10066
10067 if (a->ra != 15) {
10068 TCGv_i32 t3 = load_reg(s, a->ra);
10069 if (sub) {
10070 /*
10071 * For SMMLS, we need a 64-bit subtract. Borrow caused by
10072 * a non-zero multiplicand lowpart, and the correct result
10073 * lowpart for rounding.
10074 */
10075 TCGv_i32 zero = tcg_const_i32(0);
10076 tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
10077 tcg_temp_free_i32(zero);
10078 } else {
10079 tcg_gen_add_i32(t1, t1, t3);
10080 }
10081 tcg_temp_free_i32(t3);
10082 }
10083 if (round) {
10084 /*
10085 * Adding 0x80000000 to the 64-bit quantity means that we have
10086 * carry in to the high word when the low word has the msb set.
10087 */
10088 tcg_gen_shri_i32(t2, t2, 31);
10089 tcg_gen_add_i32(t1, t1, t2);
10090 }
10091 tcg_temp_free_i32(t2);
10092 store_reg(s, a->rd, t1);
10093 return true;
10094 }
10095
10096 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
10097 {
10098 return op_smmla(s, a, false, false);
10099 }
10100
10101 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
10102 {
10103 return op_smmla(s, a, true, false);
10104 }
10105
10106 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
10107 {
10108 return op_smmla(s, a, false, true);
10109 }
10110
10111 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
10112 {
10113 return op_smmla(s, a, true, true);
10114 }
10115
10116 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
10117 {
10118 TCGv_i32 t1, t2;
10119
10120 if (s->thumb
10121 ? !dc_isar_feature(aa32_thumb_div, s)
10122 : !dc_isar_feature(aa32_arm_div, s)) {
10123 return false;
10124 }
10125
10126 t1 = load_reg(s, a->rn);
10127 t2 = load_reg(s, a->rm);
10128 if (u) {
10129 gen_helper_udiv(t1, t1, t2);
10130 } else {
10131 gen_helper_sdiv(t1, t1, t2);
10132 }
10133 tcg_temp_free_i32(t2);
10134 store_reg(s, a->rd, t1);
10135 return true;
10136 }
10137
10138 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
10139 {
10140 return op_div(s, a, false);
10141 }
10142
10143 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
10144 {
10145 return op_div(s, a, true);
10146 }
10147
10148 /*
10149 * Block data transfer
10150 */
10151
10152 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
10153 {
10154 TCGv_i32 addr = load_reg(s, a->rn);
10155
10156 if (a->b) {
10157 if (a->i) {
10158 /* pre increment */
10159 tcg_gen_addi_i32(addr, addr, 4);
10160 } else {
10161 /* pre decrement */
10162 tcg_gen_addi_i32(addr, addr, -(n * 4));
10163 }
10164 } else if (!a->i && n != 1) {
10165 /* post decrement */
10166 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10167 }
10168
10169 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
10170 /*
10171 * If the writeback is incrementing SP rather than
10172 * decrementing it, and the initial SP is below the
10173 * stack limit but the final written-back SP would
10174 * be above, then then we must not perform any memory
10175 * accesses, but it is IMPDEF whether we generate
10176 * an exception. We choose to do so in this case.
10177 * At this point 'addr' is the lowest address, so
10178 * either the original SP (if incrementing) or our
10179 * final SP (if decrementing), so that's what we check.
10180 */
10181 gen_helper_v8m_stackcheck(cpu_env, addr);
10182 }
10183
10184 return addr;
10185 }
10186
10187 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
10188 TCGv_i32 addr, int n)
10189 {
10190 if (a->w) {
10191 /* write back */
10192 if (!a->b) {
10193 if (a->i) {
10194 /* post increment */
10195 tcg_gen_addi_i32(addr, addr, 4);
10196 } else {
10197 /* post decrement */
10198 tcg_gen_addi_i32(addr, addr, -(n * 4));
10199 }
10200 } else if (!a->i && n != 1) {
10201 /* pre decrement */
10202 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10203 }
10204 store_reg(s, a->rn, addr);
10205 } else {
10206 tcg_temp_free_i32(addr);
10207 }
10208 }
10209
10210 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
10211 {
10212 int i, j, n, list, mem_idx;
10213 bool user = a->u;
10214 TCGv_i32 addr, tmp, tmp2;
10215
10216 if (user) {
10217 /* STM (user) */
10218 if (IS_USER(s)) {
10219 /* Only usable in supervisor mode. */
10220 unallocated_encoding(s);
10221 return true;
10222 }
10223 }
10224
10225 list = a->list;
10226 n = ctpop16(list);
10227 if (n < min_n || a->rn == 15) {
10228 unallocated_encoding(s);
10229 return true;
10230 }
10231
10232 addr = op_addr_block_pre(s, a, n);
10233 mem_idx = get_mem_index(s);
10234
10235 for (i = j = 0; i < 16; i++) {
10236 if (!(list & (1 << i))) {
10237 continue;
10238 }
10239
10240 if (user && i != 15) {
10241 tmp = tcg_temp_new_i32();
10242 tmp2 = tcg_const_i32(i);
10243 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
10244 tcg_temp_free_i32(tmp2);
10245 } else {
10246 tmp = load_reg(s, i);
10247 }
10248 gen_aa32_st32(s, tmp, addr, mem_idx);
10249 tcg_temp_free_i32(tmp);
10250
10251 /* No need to add after the last transfer. */
10252 if (++j != n) {
10253 tcg_gen_addi_i32(addr, addr, 4);
10254 }
10255 }
10256
10257 op_addr_block_post(s, a, addr, n);
10258 return true;
10259 }
10260
10261 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
10262 {
10263 /* BitCount(list) < 1 is UNPREDICTABLE */
10264 return op_stm(s, a, 1);
10265 }
10266
10267 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
10268 {
10269 /* Writeback register in register list is UNPREDICTABLE for T32. */
10270 if (a->w && (a->list & (1 << a->rn))) {
10271 unallocated_encoding(s);
10272 return true;
10273 }
10274 /* BitCount(list) < 2 is UNPREDICTABLE */
10275 return op_stm(s, a, 2);
10276 }
10277
10278 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
10279 {
10280 int i, j, n, list, mem_idx;
10281 bool loaded_base;
10282 bool user = a->u;
10283 bool exc_return = false;
10284 TCGv_i32 addr, tmp, tmp2, loaded_var;
10285
10286 if (user) {
10287 /* LDM (user), LDM (exception return) */
10288 if (IS_USER(s)) {
10289 /* Only usable in supervisor mode. */
10290 unallocated_encoding(s);
10291 return true;
10292 }
10293 if (extract32(a->list, 15, 1)) {
10294 exc_return = true;
10295 user = false;
10296 } else {
10297 /* LDM (user) does not allow writeback. */
10298 if (a->w) {
10299 unallocated_encoding(s);
10300 return true;
10301 }
10302 }
10303 }
10304
10305 list = a->list;
10306 n = ctpop16(list);
10307 if (n < min_n || a->rn == 15) {
10308 unallocated_encoding(s);
10309 return true;
10310 }
10311
10312 addr = op_addr_block_pre(s, a, n);
10313 mem_idx = get_mem_index(s);
10314 loaded_base = false;
10315 loaded_var = NULL;
10316
10317 for (i = j = 0; i < 16; i++) {
10318 if (!(list & (1 << i))) {
10319 continue;
10320 }
10321
10322 tmp = tcg_temp_new_i32();
10323 gen_aa32_ld32u(s, tmp, addr, mem_idx);
10324 if (user) {
10325 tmp2 = tcg_const_i32(i);
10326 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10327 tcg_temp_free_i32(tmp2);
10328 tcg_temp_free_i32(tmp);
10329 } else if (i == a->rn) {
10330 loaded_var = tmp;
10331 loaded_base = true;
10332 } else if (i == 15 && exc_return) {
10333 store_pc_exc_ret(s, tmp);
10334 } else {
10335 store_reg_from_load(s, i, tmp);
10336 }
10337
10338 /* No need to add after the last transfer. */
10339 if (++j != n) {
10340 tcg_gen_addi_i32(addr, addr, 4);
10341 }
10342 }
10343
10344 op_addr_block_post(s, a, addr, n);
10345
10346 if (loaded_base) {
10347 /* Note that we reject base == pc above. */
10348 store_reg(s, a->rn, loaded_var);
10349 }
10350
10351 if (exc_return) {
10352 /* Restore CPSR from SPSR. */
10353 tmp = load_cpu_field(spsr);
10354 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10355 gen_io_start();
10356 }
10357 gen_helper_cpsr_write_eret(cpu_env, tmp);
10358 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10359 gen_io_end();
10360 }
10361 tcg_temp_free_i32(tmp);
10362 /* Must exit loop to check un-masked IRQs */
10363 s->base.is_jmp = DISAS_EXIT;
10364 }
10365 return true;
10366 }
10367
10368 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
10369 {
10370 /*
10371 * Writeback register in register list is UNPREDICTABLE
10372 * for ArchVersion() >= 7. Prior to v7, A32 would write
10373 * an UNKNOWN value to the base register.
10374 */
10375 if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10376 unallocated_encoding(s);
10377 return true;
10378 }
10379 /* BitCount(list) < 1 is UNPREDICTABLE */
10380 return do_ldm(s, a, 1);
10381 }
10382
10383 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10384 {
10385 /* Writeback register in register list is UNPREDICTABLE for T32. */
10386 if (a->w && (a->list & (1 << a->rn))) {
10387 unallocated_encoding(s);
10388 return true;
10389 }
10390 /* BitCount(list) < 2 is UNPREDICTABLE */
10391 return do_ldm(s, a, 2);
10392 }
10393
10394 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10395 {
10396 /* Writeback is conditional on the base register not being loaded. */
10397 a->w = !(a->list & (1 << a->rn));
10398 /* BitCount(list) < 1 is UNPREDICTABLE */
10399 return do_ldm(s, a, 1);
10400 }
10401
10402 /*
10403 * Branch, branch with link
10404 */
10405
10406 static bool trans_B(DisasContext *s, arg_i *a)
10407 {
10408 gen_jmp(s, read_pc(s) + a->imm);
10409 return true;
10410 }
10411
10412 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10413 {
10414 /* This has cond from encoding, required to be outside IT block. */
10415 if (a->cond >= 0xe) {
10416 return false;
10417 }
10418 if (s->condexec_mask) {
10419 unallocated_encoding(s);
10420 return true;
10421 }
10422 arm_skip_unless(s, a->cond);
10423 gen_jmp(s, read_pc(s) + a->imm);
10424 return true;
10425 }
10426
10427 static bool trans_BL(DisasContext *s, arg_i *a)
10428 {
10429 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10430 gen_jmp(s, read_pc(s) + a->imm);
10431 return true;
10432 }
10433
10434 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10435 {
10436 TCGv_i32 tmp;
10437
10438 /* For A32, ARCH(5) is checked near the start of the uncond block. */
10439 if (s->thumb && (a->imm & 2)) {
10440 return false;
10441 }
10442 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10443 tmp = tcg_const_i32(!s->thumb);
10444 store_cpu_field(tmp, thumb);
10445 gen_jmp(s, (read_pc(s) & ~3) + a->imm);
10446 return true;
10447 }
10448
10449 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
10450 {
10451 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10452 tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
10453 return true;
10454 }
10455
10456 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
10457 {
10458 TCGv_i32 tmp = tcg_temp_new_i32();
10459
10460 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10461 tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
10462 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10463 gen_bx(s, tmp);
10464 return true;
10465 }
10466
10467 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
10468 {
10469 TCGv_i32 tmp;
10470
10471 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10472 if (!ENABLE_ARCH_5) {
10473 return false;
10474 }
10475 tmp = tcg_temp_new_i32();
10476 tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
10477 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10478 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10479 gen_bx(s, tmp);
10480 return true;
10481 }
10482
10483 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10484 {
10485 TCGv_i32 addr, tmp;
10486
10487 tmp = load_reg(s, a->rm);
10488 if (half) {
10489 tcg_gen_add_i32(tmp, tmp, tmp);
10490 }
10491 addr = load_reg(s, a->rn);
10492 tcg_gen_add_i32(addr, addr, tmp);
10493
10494 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10495 half ? MO_UW | s->be_data : MO_UB);
10496 tcg_temp_free_i32(addr);
10497
10498 tcg_gen_add_i32(tmp, tmp, tmp);
10499 tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10500 store_reg(s, 15, tmp);
10501 return true;
10502 }
10503
10504 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10505 {
10506 return op_tbranch(s, a, false);
10507 }
10508
10509 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10510 {
10511 return op_tbranch(s, a, true);
10512 }
10513
10514 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10515 {
10516 TCGv_i32 tmp = load_reg(s, a->rn);
10517
10518 arm_gen_condlabel(s);
10519 tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10520 tmp, 0, s->condlabel);
10521 tcg_temp_free_i32(tmp);
10522 gen_jmp(s, read_pc(s) + a->imm);
10523 return true;
10524 }
10525
10526 /*
10527 * Supervisor call - both T32 & A32 come here so we need to check
10528 * which mode we are in when checking for semihosting.
10529 */
10530
10531 static bool trans_SVC(DisasContext *s, arg_SVC *a)
10532 {
10533 const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
10534
10535 if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
10536 #ifndef CONFIG_USER_ONLY
10537 !IS_USER(s) &&
10538 #endif
10539 (a->imm == semihost_imm)) {
10540 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
10541 } else {
10542 gen_set_pc_im(s, s->base.pc_next);
10543 s->svc_imm = a->imm;
10544 s->base.is_jmp = DISAS_SWI;
10545 }
10546 return true;
10547 }
10548
10549 /*
10550 * Unconditional system instructions
10551 */
10552
10553 static bool trans_RFE(DisasContext *s, arg_RFE *a)
10554 {
10555 static const int8_t pre_offset[4] = {
10556 /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10557 };
10558 static const int8_t post_offset[4] = {
10559 /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10560 };
10561 TCGv_i32 addr, t1, t2;
10562
10563 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10564 return false;
10565 }
10566 if (IS_USER(s)) {
10567 unallocated_encoding(s);
10568 return true;
10569 }
10570
10571 addr = load_reg(s, a->rn);
10572 tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10573
10574 /* Load PC into tmp and CPSR into tmp2. */
10575 t1 = tcg_temp_new_i32();
10576 gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10577 tcg_gen_addi_i32(addr, addr, 4);
10578 t2 = tcg_temp_new_i32();
10579 gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10580
10581 if (a->w) {
10582 /* Base writeback. */
10583 tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10584 store_reg(s, a->rn, addr);
10585 } else {
10586 tcg_temp_free_i32(addr);
10587 }
10588 gen_rfe(s, t1, t2);
10589 return true;
10590 }
10591
10592 static bool trans_SRS(DisasContext *s, arg_SRS *a)
10593 {
10594 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10595 return false;
10596 }
10597 gen_srs(s, a->mode, a->pu, a->w);
10598 return true;
10599 }
10600
10601 static bool trans_CPS(DisasContext *s, arg_CPS *a)
10602 {
10603 uint32_t mask, val;
10604
10605 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10606 return false;
10607 }
10608 if (IS_USER(s)) {
10609 /* Implemented as NOP in user mode. */
10610 return true;
10611 }
10612 /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10613
10614 mask = val = 0;
10615 if (a->imod & 2) {
10616 if (a->A) {
10617 mask |= CPSR_A;
10618 }
10619 if (a->I) {
10620 mask |= CPSR_I;
10621 }
10622 if (a->F) {
10623 mask |= CPSR_F;
10624 }
10625 if (a->imod & 1) {
10626 val |= mask;
10627 }
10628 }
10629 if (a->M) {
10630 mask |= CPSR_M;
10631 val |= a->mode;
10632 }
10633 if (mask) {
10634 gen_set_psr_im(s, mask, 0, val);
10635 }
10636 return true;
10637 }
10638
10639 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10640 {
10641 TCGv_i32 tmp, addr, el;
10642
10643 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10644 return false;
10645 }
10646 if (IS_USER(s)) {
10647 /* Implemented as NOP in user mode. */
10648 return true;
10649 }
10650
10651 tmp = tcg_const_i32(a->im);
10652 /* FAULTMASK */
10653 if (a->F) {
10654 addr = tcg_const_i32(19);
10655 gen_helper_v7m_msr(cpu_env, addr, tmp);
10656 tcg_temp_free_i32(addr);
10657 }
10658 /* PRIMASK */
10659 if (a->I) {
10660 addr = tcg_const_i32(16);
10661 gen_helper_v7m_msr(cpu_env, addr, tmp);
10662 tcg_temp_free_i32(addr);
10663 }
10664 el = tcg_const_i32(s->current_el);
10665 gen_helper_rebuild_hflags_m32(cpu_env, el);
10666 tcg_temp_free_i32(el);
10667 tcg_temp_free_i32(tmp);
10668 gen_lookup_tb(s);
10669 return true;
10670 }
10671
10672 /*
10673 * Clear-Exclusive, Barriers
10674 */
10675
10676 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10677 {
10678 if (s->thumb
10679 ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10680 : !ENABLE_ARCH_6K) {
10681 return false;
10682 }
10683 gen_clrex(s);
10684 return true;
10685 }
10686
10687 static bool trans_DSB(DisasContext *s, arg_DSB *a)
10688 {
10689 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10690 return false;
10691 }
10692 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10693 return true;
10694 }
10695
10696 static bool trans_DMB(DisasContext *s, arg_DMB *a)
10697 {
10698 return trans_DSB(s, NULL);
10699 }
10700
10701 static bool trans_ISB(DisasContext *s, arg_ISB *a)
10702 {
10703 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10704 return false;
10705 }
10706 /*
10707 * We need to break the TB after this insn to execute
10708 * self-modifying code correctly and also to take
10709 * any pending interrupts immediately.
10710 */
10711 gen_goto_tb(s, 0, s->base.pc_next);
10712 return true;
10713 }
10714
10715 static bool trans_SB(DisasContext *s, arg_SB *a)
10716 {
10717 if (!dc_isar_feature(aa32_sb, s)) {
10718 return false;
10719 }
10720 /*
10721 * TODO: There is no speculation barrier opcode
10722 * for TCG; MB and end the TB instead.
10723 */
10724 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10725 gen_goto_tb(s, 0, s->base.pc_next);
10726 return true;
10727 }
10728
10729 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10730 {
10731 if (!ENABLE_ARCH_6) {
10732 return false;
10733 }
10734 if (a->E != (s->be_data == MO_BE)) {
10735 gen_helper_setend(cpu_env);
10736 s->base.is_jmp = DISAS_UPDATE;
10737 }
10738 return true;
10739 }
10740
10741 /*
10742 * Preload instructions
10743 * All are nops, contingent on the appropriate arch level.
10744 */
10745
10746 static bool trans_PLD(DisasContext *s, arg_PLD *a)
10747 {
10748 return ENABLE_ARCH_5TE;
10749 }
10750
10751 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10752 {
10753 return arm_dc_feature(s, ARM_FEATURE_V7MP);
10754 }
10755
10756 static bool trans_PLI(DisasContext *s, arg_PLD *a)
10757 {
10758 return ENABLE_ARCH_7;
10759 }
10760
10761 /*
10762 * If-then
10763 */
10764
10765 static bool trans_IT(DisasContext *s, arg_IT *a)
10766 {
10767 int cond_mask = a->cond_mask;
10768
10769 /*
10770 * No actual code generated for this insn, just setup state.
10771 *
10772 * Combinations of firstcond and mask which set up an 0b1111
10773 * condition are UNPREDICTABLE; we take the CONSTRAINED
10774 * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10775 * i.e. both meaning "execute always".
10776 */
10777 s->condexec_cond = (cond_mask >> 4) & 0xe;
10778 s->condexec_mask = cond_mask & 0x1f;
10779 return true;
10780 }
10781
10782 /*
10783 * Legacy decoder.
10784 */
10785
10786 static void disas_arm_insn(DisasContext *s, unsigned int insn)
10787 {
10788 unsigned int cond = insn >> 28;
10789
10790 /* M variants do not implement ARM mode; this must raise the INVSTATE
10791 * UsageFault exception.
10792 */
10793 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10794 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10795 default_exception_el(s));
10796 return;
10797 }
10798
10799 if (cond == 0xf) {
10800 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10801 * choose to UNDEF. In ARMv5 and above the space is used
10802 * for miscellaneous unconditional instructions.
10803 */
10804 ARCH(5);
10805
10806 /* Unconditional instructions. */
10807 /* TODO: Perhaps merge these into one decodetree output file. */
10808 if (disas_a32_uncond(s, insn) ||
10809 disas_vfp_uncond(s, insn) ||
10810 disas_neon_dp(s, insn) ||
10811 disas_neon_ls(s, insn) ||
10812 disas_neon_shared(s, insn)) {
10813 return;
10814 }
10815 /* fall back to legacy decoder */
10816
10817 if (((insn >> 25) & 7) == 1) {
10818 /* NEON Data processing. */
10819 if (disas_neon_data_insn(s, insn)) {
10820 goto illegal_op;
10821 }
10822 return;
10823 }
10824 if ((insn & 0x0e000f00) == 0x0c000100) {
10825 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10826 /* iWMMXt register transfer. */
10827 if (extract32(s->c15_cpar, 1, 1)) {
10828 if (!disas_iwmmxt_insn(s, insn)) {
10829 return;
10830 }
10831 }
10832 }
10833 }
10834 goto illegal_op;
10835 }
10836 if (cond != 0xe) {
10837 /* if not always execute, we generate a conditional jump to
10838 next instruction */
10839 arm_skip_unless(s, cond);
10840 }
10841
10842 /* TODO: Perhaps merge these into one decodetree output file. */
10843 if (disas_a32(s, insn) ||
10844 disas_vfp(s, insn)) {
10845 return;
10846 }
10847 /* fall back to legacy decoder */
10848
10849 switch ((insn >> 24) & 0xf) {
10850 case 0xc:
10851 case 0xd:
10852 case 0xe:
10853 if (((insn >> 8) & 0xe) == 10) {
10854 /* VFP, but failed disas_vfp. */
10855 goto illegal_op;
10856 }
10857 if (disas_coproc_insn(s, insn)) {
10858 /* Coprocessor. */
10859 goto illegal_op;
10860 }
10861 break;
10862 default:
10863 illegal_op:
10864 unallocated_encoding(s);
10865 break;
10866 }
10867 }
10868
10869 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10870 {
10871 /*
10872 * Return true if this is a 16 bit instruction. We must be precise
10873 * about this (matching the decode).
10874 */
10875 if ((insn >> 11) < 0x1d) {
10876 /* Definitely a 16-bit instruction */
10877 return true;
10878 }
10879
10880 /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10881 * first half of a 32-bit Thumb insn. Thumb-1 cores might
10882 * end up actually treating this as two 16-bit insns, though,
10883 * if it's half of a bl/blx pair that might span a page boundary.
10884 */
10885 if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10886 arm_dc_feature(s, ARM_FEATURE_M)) {
10887 /* Thumb2 cores (including all M profile ones) always treat
10888 * 32-bit insns as 32-bit.
10889 */
10890 return false;
10891 }
10892
10893 if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10894 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10895 * is not on the next page; we merge this into a 32-bit
10896 * insn.
10897 */
10898 return false;
10899 }
10900 /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10901 * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10902 * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10903 * -- handle as single 16 bit insn
10904 */
10905 return true;
10906 }
10907
10908 /* Translate a 32-bit thumb instruction. */
10909 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10910 {
10911 /*
10912 * ARMv6-M supports a limited subset of Thumb2 instructions.
10913 * Other Thumb1 architectures allow only 32-bit
10914 * combined BL/BLX prefix and suffix.
10915 */
10916 if (arm_dc_feature(s, ARM_FEATURE_M) &&
10917 !arm_dc_feature(s, ARM_FEATURE_V7)) {
10918 int i;
10919 bool found = false;
10920 static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10921 0xf3b08040 /* dsb */,
10922 0xf3b08050 /* dmb */,
10923 0xf3b08060 /* isb */,
10924 0xf3e08000 /* mrs */,
10925 0xf000d000 /* bl */};
10926 static const uint32_t armv6m_mask[] = {0xffe0d000,
10927 0xfff0d0f0,
10928 0xfff0d0f0,
10929 0xfff0d0f0,
10930 0xffe0d000,
10931 0xf800d000};
10932
10933 for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10934 if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10935 found = true;
10936 break;
10937 }
10938 }
10939 if (!found) {
10940 goto illegal_op;
10941 }
10942 } else if ((insn & 0xf800e800) != 0xf000e800) {
10943 ARCH(6T2);
10944 }
10945
10946 if ((insn & 0xef000000) == 0xef000000) {
10947 /*
10948 * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10949 * transform into
10950 * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10951 */
10952 uint32_t a32_insn = (insn & 0xe2ffffff) |
10953 ((insn & (1 << 28)) >> 4) | (1 << 28);
10954
10955 if (disas_neon_dp(s, a32_insn)) {
10956 return;
10957 }
10958 }
10959
10960 if ((insn & 0xff100000) == 0xf9000000) {
10961 /*
10962 * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10963 * transform into
10964 * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10965 */
10966 uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
10967
10968 if (disas_neon_ls(s, a32_insn)) {
10969 return;
10970 }
10971 }
10972
10973 /*
10974 * TODO: Perhaps merge these into one decodetree output file.
10975 * Note disas_vfp is written for a32 with cond field in the
10976 * top nibble. The t32 encoding requires 0xe in the top nibble.
10977 */
10978 if (disas_t32(s, insn) ||
10979 disas_vfp_uncond(s, insn) ||
10980 disas_neon_shared(s, insn) ||
10981 ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10982 return;
10983 }
10984 /* fall back to legacy decoder */
10985
10986 switch ((insn >> 25) & 0xf) {
10987 case 0: case 1: case 2: case 3:
10988 /* 16-bit instructions. Should never happen. */
10989 abort();
10990 case 6: case 7: case 14: case 15:
10991 /* Coprocessor. */
10992 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10993 /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10994 if (extract32(insn, 24, 2) == 3) {
10995 goto illegal_op; /* op0 = 0b11 : unallocated */
10996 }
10997
10998 if (((insn >> 8) & 0xe) == 10 &&
10999 dc_isar_feature(aa32_fpsp_v2, s)) {
11000 /* FP, and the CPU supports it */
11001 goto illegal_op;
11002 } else {
11003 /* All other insns: NOCP */
11004 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
11005 syn_uncategorized(),
11006 default_exception_el(s));
11007 }
11008 break;
11009 }
11010 if (((insn >> 24) & 3) == 3) {
11011 /* Translate into the equivalent ARM encoding. */
11012 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
11013 if (disas_neon_data_insn(s, insn)) {
11014 goto illegal_op;
11015 }
11016 } else if (((insn >> 8) & 0xe) == 10) {
11017 /* VFP, but failed disas_vfp. */
11018 goto illegal_op;
11019 } else {
11020 if (insn & (1 << 28))
11021 goto illegal_op;
11022 if (disas_coproc_insn(s, insn)) {
11023 goto illegal_op;
11024 }
11025 }
11026 break;
11027 case 12:
11028 goto illegal_op;
11029 default:
11030 illegal_op:
11031 unallocated_encoding(s);
11032 }
11033 }
11034
11035 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
11036 {
11037 if (!disas_t16(s, insn)) {
11038 unallocated_encoding(s);
11039 }
11040 }
11041
11042 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
11043 {
11044 /* Return true if the insn at dc->base.pc_next might cross a page boundary.
11045 * (False positives are OK, false negatives are not.)
11046 * We know this is a Thumb insn, and our caller ensures we are
11047 * only called if dc->base.pc_next is less than 4 bytes from the page
11048 * boundary, so we cross the page if the first 16 bits indicate
11049 * that this is a 32 bit insn.
11050 */
11051 uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
11052
11053 return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
11054 }
11055
11056 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
11057 {
11058 DisasContext *dc = container_of(dcbase, DisasContext, base);
11059 CPUARMState *env = cs->env_ptr;
11060 ARMCPU *cpu = env_archcpu(env);
11061 uint32_t tb_flags = dc->base.tb->flags;
11062 uint32_t condexec, core_mmu_idx;
11063
11064 dc->isar = &cpu->isar;
11065 dc->condjmp = 0;
11066
11067 dc->aarch64 = 0;
11068 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11069 * there is no secure EL1, so we route exceptions to EL3.
11070 */
11071 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11072 !arm_el_is_aa64(env, 3);
11073 dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
11074 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
11075 condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
11076 dc->condexec_mask = (condexec & 0xf) << 1;
11077 dc->condexec_cond = condexec >> 4;
11078
11079 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
11080 dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
11081 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11082 #if !defined(CONFIG_USER_ONLY)
11083 dc->user = (dc->current_el == 0);
11084 #endif
11085 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
11086
11087 if (arm_feature(env, ARM_FEATURE_M)) {
11088 dc->vfp_enabled = 1;
11089 dc->be_data = MO_TE;
11090 dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
11091 dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
11092 regime_is_secure(env, dc->mmu_idx);
11093 dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
11094 dc->v8m_fpccr_s_wrong =
11095 FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
11096 dc->v7m_new_fp_ctxt_needed =
11097 FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
11098 dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
11099 } else {
11100 dc->be_data =
11101 FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
11102 dc->debug_target_el =
11103 FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
11104 dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
11105 dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
11106 dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
11107 dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
11108 if (arm_feature(env, ARM_FEATURE_XSCALE)) {
11109 dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
11110 } else {
11111 dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
11112 dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
11113 }
11114 }
11115 dc->cp_regs = cpu->cp_regs;
11116 dc->features = env->features;
11117
11118 /* Single step state. The code-generation logic here is:
11119 * SS_ACTIVE == 0:
11120 * generate code with no special handling for single-stepping (except
11121 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11122 * this happens anyway because those changes are all system register or
11123 * PSTATE writes).
11124 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11125 * emit code for one insn
11126 * emit code to clear PSTATE.SS
11127 * emit code to generate software step exception for completed step
11128 * end TB (as usual for having generated an exception)
11129 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11130 * emit code to generate a software step exception
11131 * end the TB
11132 */
11133 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
11134 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
11135 dc->is_ldex = false;
11136
11137 dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
11138
11139 /* If architectural single step active, limit to 1. */
11140 if (is_singlestepping(dc)) {
11141 dc->base.max_insns = 1;
11142 }
11143
11144 /* ARM is a fixed-length ISA. Bound the number of insns to execute
11145 to those left on the page. */
11146 if (!dc->thumb) {
11147 int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11148 dc->base.max_insns = MIN(dc->base.max_insns, bound);
11149 }
11150
11151 cpu_V0 = tcg_temp_new_i64();
11152 cpu_V1 = tcg_temp_new_i64();
11153 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
11154 cpu_M0 = tcg_temp_new_i64();
11155 }
11156
11157 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
11158 {
11159 DisasContext *dc = container_of(dcbase, DisasContext, base);
11160
11161 /* A note on handling of the condexec (IT) bits:
11162 *
11163 * We want to avoid the overhead of having to write the updated condexec
11164 * bits back to the CPUARMState for every instruction in an IT block. So:
11165 * (1) if the condexec bits are not already zero then we write
11166 * zero back into the CPUARMState now. This avoids complications trying
11167 * to do it at the end of the block. (For example if we don't do this
11168 * it's hard to identify whether we can safely skip writing condexec
11169 * at the end of the TB, which we definitely want to do for the case
11170 * where a TB doesn't do anything with the IT state at all.)
11171 * (2) if we are going to leave the TB then we call gen_set_condexec()
11172 * which will write the correct value into CPUARMState if zero is wrong.
11173 * This is done both for leaving the TB at the end, and for leaving
11174 * it because of an exception we know will happen, which is done in
11175 * gen_exception_insn(). The latter is necessary because we need to
11176 * leave the TB with the PC/IT state just prior to execution of the
11177 * instruction which caused the exception.
11178 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
11179 * then the CPUARMState will be wrong and we need to reset it.
11180 * This is handled in the same way as restoration of the
11181 * PC in these situations; we save the value of the condexec bits
11182 * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
11183 * then uses this to restore them after an exception.
11184 *
11185 * Note that there are no instructions which can read the condexec
11186 * bits, and none which can write non-static values to them, so
11187 * we don't need to care about whether CPUARMState is correct in the
11188 * middle of a TB.
11189 */
11190
11191 /* Reset the conditional execution bits immediately. This avoids
11192 complications trying to do it at the end of the block. */
11193 if (dc->condexec_mask || dc->condexec_cond) {
11194 TCGv_i32 tmp = tcg_temp_new_i32();
11195 tcg_gen_movi_i32(tmp, 0);
11196 store_cpu_field(tmp, condexec_bits);
11197 }
11198 }
11199
11200 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
11201 {
11202 DisasContext *dc = container_of(dcbase, DisasContext, base);
11203
11204 tcg_gen_insn_start(dc->base.pc_next,
11205 (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
11206 0);
11207 dc->insn_start = tcg_last_op();
11208 }
11209
11210 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
11211 const CPUBreakpoint *bp)
11212 {
11213 DisasContext *dc = container_of(dcbase, DisasContext, base);
11214
11215 if (bp->flags & BP_CPU) {
11216 gen_set_condexec(dc);
11217 gen_set_pc_im(dc, dc->base.pc_next);
11218 gen_helper_check_breakpoints(cpu_env);
11219 /* End the TB early; it's likely not going to be executed */
11220 dc->base.is_jmp = DISAS_TOO_MANY;
11221 } else {
11222 gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
11223 /* The address covered by the breakpoint must be
11224 included in [tb->pc, tb->pc + tb->size) in order
11225 to for it to be properly cleared -- thus we
11226 increment the PC here so that the logic setting
11227 tb->size below does the right thing. */
11228 /* TODO: Advance PC by correct instruction length to
11229 * avoid disassembler error messages */
11230 dc->base.pc_next += 2;
11231 dc->base.is_jmp = DISAS_NORETURN;
11232 }
11233
11234 return true;
11235 }
11236
11237 static bool arm_pre_translate_insn(DisasContext *dc)
11238 {
11239 #ifdef CONFIG_USER_ONLY
11240 /* Intercept jump to the magic kernel page. */
11241 if (dc->base.pc_next >= 0xffff0000) {
11242 /* We always get here via a jump, so know we are not in a
11243 conditional execution block. */
11244 gen_exception_internal(EXCP_KERNEL_TRAP);
11245 dc->base.is_jmp = DISAS_NORETURN;
11246 return true;
11247 }
11248 #endif
11249
11250 if (dc->ss_active && !dc->pstate_ss) {
11251 /* Singlestep state is Active-pending.
11252 * If we're in this state at the start of a TB then either
11253 * a) we just took an exception to an EL which is being debugged
11254 * and this is the first insn in the exception handler
11255 * b) debug exceptions were masked and we just unmasked them
11256 * without changing EL (eg by clearing PSTATE.D)
11257 * In either case we're going to take a swstep exception in the
11258 * "did not step an insn" case, and so the syndrome ISV and EX
11259 * bits should be zero.
11260 */
11261 assert(dc->base.num_insns == 1);
11262 gen_swstep_exception(dc, 0, 0);
11263 dc->base.is_jmp = DISAS_NORETURN;
11264 return true;
11265 }
11266
11267 return false;
11268 }
11269
11270 static void arm_post_translate_insn(DisasContext *dc)
11271 {
11272 if (dc->condjmp && !dc->base.is_jmp) {
11273 gen_set_label(dc->condlabel);
11274 dc->condjmp = 0;
11275 }
11276 translator_loop_temp_check(&dc->base);
11277 }
11278
11279 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11280 {
11281 DisasContext *dc = container_of(dcbase, DisasContext, base);
11282 CPUARMState *env = cpu->env_ptr;
11283 unsigned int insn;
11284
11285 if (arm_pre_translate_insn(dc)) {
11286 return;
11287 }
11288
11289 dc->pc_curr = dc->base.pc_next;
11290 insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
11291 dc->insn = insn;
11292 dc->base.pc_next += 4;
11293 disas_arm_insn(dc, insn);
11294
11295 arm_post_translate_insn(dc);
11296
11297 /* ARM is a fixed-length ISA. We performed the cross-page check
11298 in init_disas_context by adjusting max_insns. */
11299 }
11300
11301 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
11302 {
11303 /* Return true if this Thumb insn is always unconditional,
11304 * even inside an IT block. This is true of only a very few
11305 * instructions: BKPT, HLT, and SG.
11306 *
11307 * A larger class of instructions are UNPREDICTABLE if used
11308 * inside an IT block; we do not need to detect those here, because
11309 * what we do by default (perform the cc check and update the IT
11310 * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
11311 * choice for those situations.
11312 *
11313 * insn is either a 16-bit or a 32-bit instruction; the two are
11314 * distinguishable because for the 16-bit case the top 16 bits
11315 * are zeroes, and that isn't a valid 32-bit encoding.
11316 */
11317 if ((insn & 0xffffff00) == 0xbe00) {
11318 /* BKPT */
11319 return true;
11320 }
11321
11322 if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
11323 !arm_dc_feature(s, ARM_FEATURE_M)) {
11324 /* HLT: v8A only. This is unconditional even when it is going to
11325 * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
11326 * For v7 cores this was a plain old undefined encoding and so
11327 * honours its cc check. (We might be using the encoding as
11328 * a semihosting trap, but we don't change the cc check behaviour
11329 * on that account, because a debugger connected to a real v7A
11330 * core and emulating semihosting traps by catching the UNDEF
11331 * exception would also only see cases where the cc check passed.
11332 * No guest code should be trying to do a HLT semihosting trap
11333 * in an IT block anyway.
11334 */
11335 return true;
11336 }
11337
11338 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
11339 arm_dc_feature(s, ARM_FEATURE_M)) {
11340 /* SG: v8M only */
11341 return true;
11342 }
11343
11344 return false;
11345 }
11346
11347 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11348 {
11349 DisasContext *dc = container_of(dcbase, DisasContext, base);
11350 CPUARMState *env = cpu->env_ptr;
11351 uint32_t insn;
11352 bool is_16bit;
11353
11354 if (arm_pre_translate_insn(dc)) {
11355 return;
11356 }
11357
11358 dc->pc_curr = dc->base.pc_next;
11359 insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11360 is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
11361 dc->base.pc_next += 2;
11362 if (!is_16bit) {
11363 uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11364
11365 insn = insn << 16 | insn2;
11366 dc->base.pc_next += 2;
11367 }
11368 dc->insn = insn;
11369
11370 if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
11371 uint32_t cond = dc->condexec_cond;
11372
11373 /*
11374 * Conditionally skip the insn. Note that both 0xe and 0xf mean
11375 * "always"; 0xf is not "never".
11376 */
11377 if (cond < 0x0e) {
11378 arm_skip_unless(dc, cond);
11379 }
11380 }
11381
11382 if (is_16bit) {
11383 disas_thumb_insn(dc, insn);
11384 } else {
11385 disas_thumb2_insn(dc, insn);
11386 }
11387
11388 /* Advance the Thumb condexec condition. */
11389 if (dc->condexec_mask) {
11390 dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11391 ((dc->condexec_mask >> 4) & 1));
11392 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11393 if (dc->condexec_mask == 0) {
11394 dc->condexec_cond = 0;
11395 }
11396 }
11397
11398 arm_post_translate_insn(dc);
11399
11400 /* Thumb is a variable-length ISA. Stop translation when the next insn
11401 * will touch a new page. This ensures that prefetch aborts occur at
11402 * the right place.
11403 *
11404 * We want to stop the TB if the next insn starts in a new page,
11405 * or if it spans between this page and the next. This means that
11406 * if we're looking at the last halfword in the page we need to
11407 * see if it's a 16-bit Thumb insn (which will fit in this TB)
11408 * or a 32-bit Thumb insn (which won't).
11409 * This is to avoid generating a silly TB with a single 16-bit insn
11410 * in it at the end of this page (which would execute correctly
11411 * but isn't very efficient).
11412 */
11413 if (dc->base.is_jmp == DISAS_NEXT
11414 && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11415 || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11416 && insn_crosses_page(env, dc)))) {
11417 dc->base.is_jmp = DISAS_TOO_MANY;
11418 }
11419 }
11420
11421 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11422 {
11423 DisasContext *dc = container_of(dcbase, DisasContext, base);
11424
11425 if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11426 /* FIXME: This can theoretically happen with self-modifying code. */
11427 cpu_abort(cpu, "IO on conditional branch instruction");
11428 }
11429
11430 /* At this stage dc->condjmp will only be set when the skipped
11431 instruction was a conditional branch or trap, and the PC has
11432 already been written. */
11433 gen_set_condexec(dc);
11434 if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11435 /* Exception return branches need some special case code at the
11436 * end of the TB, which is complex enough that it has to
11437 * handle the single-step vs not and the condition-failed
11438 * insn codepath itself.
11439 */
11440 gen_bx_excret_final_code(dc);
11441 } else if (unlikely(is_singlestepping(dc))) {
11442 /* Unconditional and "condition passed" instruction codepath. */
11443 switch (dc->base.is_jmp) {
11444 case DISAS_SWI:
11445 gen_ss_advance(dc);
11446 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11447 default_exception_el(dc));
11448 break;
11449 case DISAS_HVC:
11450 gen_ss_advance(dc);
11451 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11452 break;
11453 case DISAS_SMC:
11454 gen_ss_advance(dc);
11455 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11456 break;
11457 case DISAS_NEXT:
11458 case DISAS_TOO_MANY:
11459 case DISAS_UPDATE:
11460 gen_set_pc_im(dc, dc->base.pc_next);
11461 /* fall through */
11462 default:
11463 /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11464 gen_singlestep_exception(dc);
11465 break;
11466 case DISAS_NORETURN:
11467 break;
11468 }
11469 } else {
11470 /* While branches must always occur at the end of an IT block,
11471 there are a few other things that can cause us to terminate
11472 the TB in the middle of an IT block:
11473 - Exception generating instructions (bkpt, swi, undefined).
11474 - Page boundaries.
11475 - Hardware watchpoints.
11476 Hardware breakpoints have already been handled and skip this code.
11477 */
11478 switch(dc->base.is_jmp) {
11479 case DISAS_NEXT:
11480 case DISAS_TOO_MANY:
11481 gen_goto_tb(dc, 1, dc->base.pc_next);
11482 break;
11483 case DISAS_JUMP:
11484 gen_goto_ptr();
11485 break;
11486 case DISAS_UPDATE:
11487 gen_set_pc_im(dc, dc->base.pc_next);
11488 /* fall through */
11489 default:
11490 /* indicate that the hash table must be used to find the next TB */
11491 tcg_gen_exit_tb(NULL, 0);
11492 break;
11493 case DISAS_NORETURN:
11494 /* nothing more to generate */
11495 break;
11496 case DISAS_WFI:
11497 {
11498 TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11499 !(dc->insn & (1U << 31))) ? 2 : 4);
11500
11501 gen_helper_wfi(cpu_env, tmp);
11502 tcg_temp_free_i32(tmp);
11503 /* The helper doesn't necessarily throw an exception, but we
11504 * must go back to the main loop to check for interrupts anyway.
11505 */
11506 tcg_gen_exit_tb(NULL, 0);
11507 break;
11508 }
11509 case DISAS_WFE:
11510 gen_helper_wfe(cpu_env);
11511 break;
11512 case DISAS_YIELD:
11513 gen_helper_yield(cpu_env);
11514 break;
11515 case DISAS_SWI:
11516 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11517 default_exception_el(dc));
11518 break;
11519 case DISAS_HVC:
11520 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11521 break;
11522 case DISAS_SMC:
11523 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11524 break;
11525 }
11526 }
11527
11528 if (dc->condjmp) {
11529 /* "Condition failed" instruction codepath for the branch/trap insn */
11530 gen_set_label(dc->condlabel);
11531 gen_set_condexec(dc);
11532 if (unlikely(is_singlestepping(dc))) {
11533 gen_set_pc_im(dc, dc->base.pc_next);
11534 gen_singlestep_exception(dc);
11535 } else {
11536 gen_goto_tb(dc, 1, dc->base.pc_next);
11537 }
11538 }
11539 }
11540
11541 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11542 {
11543 DisasContext *dc = container_of(dcbase, DisasContext, base);
11544
11545 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11546 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11547 }
11548
11549 static const TranslatorOps arm_translator_ops = {
11550 .init_disas_context = arm_tr_init_disas_context,
11551 .tb_start = arm_tr_tb_start,
11552 .insn_start = arm_tr_insn_start,
11553 .breakpoint_check = arm_tr_breakpoint_check,
11554 .translate_insn = arm_tr_translate_insn,
11555 .tb_stop = arm_tr_tb_stop,
11556 .disas_log = arm_tr_disas_log,
11557 };
11558
11559 static const TranslatorOps thumb_translator_ops = {
11560 .init_disas_context = arm_tr_init_disas_context,
11561 .tb_start = arm_tr_tb_start,
11562 .insn_start = arm_tr_insn_start,
11563 .breakpoint_check = arm_tr_breakpoint_check,
11564 .translate_insn = thumb_tr_translate_insn,
11565 .tb_stop = arm_tr_tb_stop,
11566 .disas_log = arm_tr_disas_log,
11567 };
11568
11569 /* generate intermediate code for basic block 'tb'. */
11570 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11571 {
11572 DisasContext dc = { };
11573 const TranslatorOps *ops = &arm_translator_ops;
11574
11575 if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
11576 ops = &thumb_translator_ops;
11577 }
11578 #ifdef TARGET_AARCH64
11579 if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11580 ops = &aarch64_translator_ops;
11581 }
11582 #endif
11583
11584 translator_loop(ops, &dc.base, cpu, tb, max_insns);
11585 }
11586
11587 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11588 target_ulong *data)
11589 {
11590 if (is_a64(env)) {
11591 env->pc = data[0];
11592 env->condexec_bits = 0;
11593 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11594 } else {
11595 env->regs[15] = data[0];
11596 env->condexec_bits = data[1];
11597 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11598 }
11599 }