]> git.proxmox.com Git - mirror_qemu.git/blob - target/arm/translate.c
b9fcbbcbcb50c1cfcb807a46de4954f97fd6ce5f
[mirror_qemu.git] / target / arm / translate.c
1 /*
2 * ARM translation
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "qemu/osdep.h"
22
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "hw/semihosting/semihost.h"
33
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
36
37 #include "trace-tcg.h"
38 #include "exec/log.h"
39
40
41 #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J dc_isar_feature(aa32_jazelle, s)
46 #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
51
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
53
54 #include "translate.h"
55
56 #if defined(CONFIG_USER_ONLY)
57 #define IS_USER(s) 1
58 #else
59 #define IS_USER(s) (s->user)
60 #endif
61
62 /* We reuse the same 64-bit temporaries for efficiency. */
63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
64 static TCGv_i32 cpu_R[16];
65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
66 TCGv_i64 cpu_exclusive_addr;
67 TCGv_i64 cpu_exclusive_val;
68
69 #include "exec/gen-icount.h"
70
71 static const char * const regnames[] =
72 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
73 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
74
75 /* Function prototypes for gen_ functions calling Neon helpers. */
76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
77 TCGv_i32, TCGv_i32);
78 /* Function prototypes for gen_ functions for fix point conversions */
79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
80
81 /* initialize TCG globals. */
82 void arm_translate_init(void)
83 {
84 int i;
85
86 for (i = 0; i < 16; i++) {
87 cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
88 offsetof(CPUARMState, regs[i]),
89 regnames[i]);
90 }
91 cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
92 cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
93 cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
94 cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
95
96 cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
97 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
98 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
99 offsetof(CPUARMState, exclusive_val), "exclusive_val");
100
101 a64_translate_init();
102 }
103
104 /* Flags for the disas_set_da_iss info argument:
105 * lower bits hold the Rt register number, higher bits are flags.
106 */
107 typedef enum ISSInfo {
108 ISSNone = 0,
109 ISSRegMask = 0x1f,
110 ISSInvalid = (1 << 5),
111 ISSIsAcqRel = (1 << 6),
112 ISSIsWrite = (1 << 7),
113 ISSIs16Bit = (1 << 8),
114 } ISSInfo;
115
116 /* Save the syndrome information for a Data Abort */
117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
118 {
119 uint32_t syn;
120 int sas = memop & MO_SIZE;
121 bool sse = memop & MO_SIGN;
122 bool is_acqrel = issinfo & ISSIsAcqRel;
123 bool is_write = issinfo & ISSIsWrite;
124 bool is_16bit = issinfo & ISSIs16Bit;
125 int srt = issinfo & ISSRegMask;
126
127 if (issinfo & ISSInvalid) {
128 /* Some callsites want to conditionally provide ISS info,
129 * eg "only if this was not a writeback"
130 */
131 return;
132 }
133
134 if (srt == 15) {
135 /* For AArch32, insns where the src/dest is R15 never generate
136 * ISS information. Catching that here saves checking at all
137 * the call sites.
138 */
139 return;
140 }
141
142 syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
143 0, 0, 0, is_write, 0, is_16bit);
144 disas_set_insn_syndrome(s, syn);
145 }
146
147 static inline int get_a32_user_mem_index(DisasContext *s)
148 {
149 /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
150 * insns:
151 * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
152 * otherwise, access as if at PL0.
153 */
154 switch (s->mmu_idx) {
155 case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */
156 case ARMMMUIdx_E10_0:
157 case ARMMMUIdx_E10_1:
158 case ARMMMUIdx_E10_1_PAN:
159 return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
160 case ARMMMUIdx_SE3:
161 case ARMMMUIdx_SE10_0:
162 case ARMMMUIdx_SE10_1:
163 case ARMMMUIdx_SE10_1_PAN:
164 return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
165 case ARMMMUIdx_MUser:
166 case ARMMMUIdx_MPriv:
167 return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
168 case ARMMMUIdx_MUserNegPri:
169 case ARMMMUIdx_MPrivNegPri:
170 return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
171 case ARMMMUIdx_MSUser:
172 case ARMMMUIdx_MSPriv:
173 return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
174 case ARMMMUIdx_MSUserNegPri:
175 case ARMMMUIdx_MSPrivNegPri:
176 return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
177 default:
178 g_assert_not_reached();
179 }
180 }
181
182 static inline TCGv_i32 load_cpu_offset(int offset)
183 {
184 TCGv_i32 tmp = tcg_temp_new_i32();
185 tcg_gen_ld_i32(tmp, cpu_env, offset);
186 return tmp;
187 }
188
189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
190
191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
192 {
193 tcg_gen_st_i32(var, cpu_env, offset);
194 tcg_temp_free_i32(var);
195 }
196
197 #define store_cpu_field(var, name) \
198 store_cpu_offset(var, offsetof(CPUARMState, name))
199
200 /* The architectural value of PC. */
201 static uint32_t read_pc(DisasContext *s)
202 {
203 return s->pc_curr + (s->thumb ? 4 : 8);
204 }
205
206 /* Set a variable to the value of a CPU register. */
207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
208 {
209 if (reg == 15) {
210 tcg_gen_movi_i32(var, read_pc(s));
211 } else {
212 tcg_gen_mov_i32(var, cpu_R[reg]);
213 }
214 }
215
216 /* Create a new temporary and set it to the value of a CPU register. */
217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
218 {
219 TCGv_i32 tmp = tcg_temp_new_i32();
220 load_reg_var(s, tmp, reg);
221 return tmp;
222 }
223
224 /*
225 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
226 * This is used for load/store for which use of PC implies (literal),
227 * or ADD that implies ADR.
228 */
229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
230 {
231 TCGv_i32 tmp = tcg_temp_new_i32();
232
233 if (reg == 15) {
234 tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
235 } else {
236 tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
237 }
238 return tmp;
239 }
240
241 /* Set a CPU register. The source must be a temporary and will be
242 marked as dead. */
243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
244 {
245 if (reg == 15) {
246 /* In Thumb mode, we must ignore bit 0.
247 * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
248 * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
249 * We choose to ignore [1:0] in ARM mode for all architecture versions.
250 */
251 tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
252 s->base.is_jmp = DISAS_JUMP;
253 }
254 tcg_gen_mov_i32(cpu_R[reg], var);
255 tcg_temp_free_i32(var);
256 }
257
258 /*
259 * Variant of store_reg which applies v8M stack-limit checks before updating
260 * SP. If the check fails this will result in an exception being taken.
261 * We disable the stack checks for CONFIG_USER_ONLY because we have
262 * no idea what the stack limits should be in that case.
263 * If stack checking is not being done this just acts like store_reg().
264 */
265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
266 {
267 #ifndef CONFIG_USER_ONLY
268 if (s->v8m_stackcheck) {
269 gen_helper_v8m_stackcheck(cpu_env, var);
270 }
271 #endif
272 store_reg(s, 13, var);
273 }
274
275 /* Value extensions. */
276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
280
281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
283
284
285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
286 {
287 TCGv_i32 tmp_mask = tcg_const_i32(mask);
288 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
289 tcg_temp_free_i32(tmp_mask);
290 }
291 /* Set NZCV flags from the high 4 bits of var. */
292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
293
294 static void gen_exception_internal(int excp)
295 {
296 TCGv_i32 tcg_excp = tcg_const_i32(excp);
297
298 assert(excp_is_internal(excp));
299 gen_helper_exception_internal(cpu_env, tcg_excp);
300 tcg_temp_free_i32(tcg_excp);
301 }
302
303 static void gen_step_complete_exception(DisasContext *s)
304 {
305 /* We just completed step of an insn. Move from Active-not-pending
306 * to Active-pending, and then also take the swstep exception.
307 * This corresponds to making the (IMPDEF) choice to prioritize
308 * swstep exceptions over asynchronous exceptions taken to an exception
309 * level where debug is disabled. This choice has the advantage that
310 * we do not need to maintain internal state corresponding to the
311 * ISV/EX syndrome bits between completion of the step and generation
312 * of the exception, and our syndrome information is always correct.
313 */
314 gen_ss_advance(s);
315 gen_swstep_exception(s, 1, s->is_ldex);
316 s->base.is_jmp = DISAS_NORETURN;
317 }
318
319 static void gen_singlestep_exception(DisasContext *s)
320 {
321 /* Generate the right kind of exception for singlestep, which is
322 * either the architectural singlestep or EXCP_DEBUG for QEMU's
323 * gdb singlestepping.
324 */
325 if (s->ss_active) {
326 gen_step_complete_exception(s);
327 } else {
328 gen_exception_internal(EXCP_DEBUG);
329 }
330 }
331
332 static inline bool is_singlestepping(DisasContext *s)
333 {
334 /* Return true if we are singlestepping either because of
335 * architectural singlestep or QEMU gdbstub singlestep. This does
336 * not include the command line '-singlestep' mode which is rather
337 * misnamed as it only means "one instruction per TB" and doesn't
338 * affect the code we generate.
339 */
340 return s->base.singlestep_enabled || s->ss_active;
341 }
342
343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
344 {
345 TCGv_i32 tmp1 = tcg_temp_new_i32();
346 TCGv_i32 tmp2 = tcg_temp_new_i32();
347 tcg_gen_ext16s_i32(tmp1, a);
348 tcg_gen_ext16s_i32(tmp2, b);
349 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
350 tcg_temp_free_i32(tmp2);
351 tcg_gen_sari_i32(a, a, 16);
352 tcg_gen_sari_i32(b, b, 16);
353 tcg_gen_mul_i32(b, b, a);
354 tcg_gen_mov_i32(a, tmp1);
355 tcg_temp_free_i32(tmp1);
356 }
357
358 /* Byteswap each halfword. */
359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
360 {
361 TCGv_i32 tmp = tcg_temp_new_i32();
362 TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
363 tcg_gen_shri_i32(tmp, var, 8);
364 tcg_gen_and_i32(tmp, tmp, mask);
365 tcg_gen_and_i32(var, var, mask);
366 tcg_gen_shli_i32(var, var, 8);
367 tcg_gen_or_i32(dest, var, tmp);
368 tcg_temp_free_i32(mask);
369 tcg_temp_free_i32(tmp);
370 }
371
372 /* Byteswap low halfword and sign extend. */
373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
374 {
375 tcg_gen_ext16u_i32(var, var);
376 tcg_gen_bswap16_i32(var, var);
377 tcg_gen_ext16s_i32(dest, var);
378 }
379
380 /* 32x32->64 multiply. Marks inputs as dead. */
381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
382 {
383 TCGv_i32 lo = tcg_temp_new_i32();
384 TCGv_i32 hi = tcg_temp_new_i32();
385 TCGv_i64 ret;
386
387 tcg_gen_mulu2_i32(lo, hi, a, b);
388 tcg_temp_free_i32(a);
389 tcg_temp_free_i32(b);
390
391 ret = tcg_temp_new_i64();
392 tcg_gen_concat_i32_i64(ret, lo, hi);
393 tcg_temp_free_i32(lo);
394 tcg_temp_free_i32(hi);
395
396 return ret;
397 }
398
399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
400 {
401 TCGv_i32 lo = tcg_temp_new_i32();
402 TCGv_i32 hi = tcg_temp_new_i32();
403 TCGv_i64 ret;
404
405 tcg_gen_muls2_i32(lo, hi, a, b);
406 tcg_temp_free_i32(a);
407 tcg_temp_free_i32(b);
408
409 ret = tcg_temp_new_i64();
410 tcg_gen_concat_i32_i64(ret, lo, hi);
411 tcg_temp_free_i32(lo);
412 tcg_temp_free_i32(hi);
413
414 return ret;
415 }
416
417 /* Swap low and high halfwords. */
418 static void gen_swap_half(TCGv_i32 var)
419 {
420 tcg_gen_rotri_i32(var, var, 16);
421 }
422
423 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
424 tmp = (t0 ^ t1) & 0x8000;
425 t0 &= ~0x8000;
426 t1 &= ~0x8000;
427 t0 = (t0 + t1) ^ tmp;
428 */
429
430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
431 {
432 TCGv_i32 tmp = tcg_temp_new_i32();
433 tcg_gen_xor_i32(tmp, t0, t1);
434 tcg_gen_andi_i32(tmp, tmp, 0x8000);
435 tcg_gen_andi_i32(t0, t0, ~0x8000);
436 tcg_gen_andi_i32(t1, t1, ~0x8000);
437 tcg_gen_add_i32(t0, t0, t1);
438 tcg_gen_xor_i32(dest, t0, tmp);
439 tcg_temp_free_i32(tmp);
440 }
441
442 /* Set N and Z flags from var. */
443 static inline void gen_logic_CC(TCGv_i32 var)
444 {
445 tcg_gen_mov_i32(cpu_NF, var);
446 tcg_gen_mov_i32(cpu_ZF, var);
447 }
448
449 /* dest = T0 + T1 + CF. */
450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
451 {
452 tcg_gen_add_i32(dest, t0, t1);
453 tcg_gen_add_i32(dest, dest, cpu_CF);
454 }
455
456 /* dest = T0 - T1 + CF - 1. */
457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
458 {
459 tcg_gen_sub_i32(dest, t0, t1);
460 tcg_gen_add_i32(dest, dest, cpu_CF);
461 tcg_gen_subi_i32(dest, dest, 1);
462 }
463
464 /* dest = T0 + T1. Compute C, N, V and Z flags */
465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
466 {
467 TCGv_i32 tmp = tcg_temp_new_i32();
468 tcg_gen_movi_i32(tmp, 0);
469 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
470 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
471 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
472 tcg_gen_xor_i32(tmp, t0, t1);
473 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
474 tcg_temp_free_i32(tmp);
475 tcg_gen_mov_i32(dest, cpu_NF);
476 }
477
478 /* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
480 {
481 TCGv_i32 tmp = tcg_temp_new_i32();
482 if (TCG_TARGET_HAS_add2_i32) {
483 tcg_gen_movi_i32(tmp, 0);
484 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
485 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
486 } else {
487 TCGv_i64 q0 = tcg_temp_new_i64();
488 TCGv_i64 q1 = tcg_temp_new_i64();
489 tcg_gen_extu_i32_i64(q0, t0);
490 tcg_gen_extu_i32_i64(q1, t1);
491 tcg_gen_add_i64(q0, q0, q1);
492 tcg_gen_extu_i32_i64(q1, cpu_CF);
493 tcg_gen_add_i64(q0, q0, q1);
494 tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
495 tcg_temp_free_i64(q0);
496 tcg_temp_free_i64(q1);
497 }
498 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
499 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
500 tcg_gen_xor_i32(tmp, t0, t1);
501 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
502 tcg_temp_free_i32(tmp);
503 tcg_gen_mov_i32(dest, cpu_NF);
504 }
505
506 /* dest = T0 - T1. Compute C, N, V and Z flags */
507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
508 {
509 TCGv_i32 tmp;
510 tcg_gen_sub_i32(cpu_NF, t0, t1);
511 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
512 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
513 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
514 tmp = tcg_temp_new_i32();
515 tcg_gen_xor_i32(tmp, t0, t1);
516 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
517 tcg_temp_free_i32(tmp);
518 tcg_gen_mov_i32(dest, cpu_NF);
519 }
520
521 /* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
523 {
524 TCGv_i32 tmp = tcg_temp_new_i32();
525 tcg_gen_not_i32(tmp, t1);
526 gen_adc_CC(dest, t0, tmp);
527 tcg_temp_free_i32(tmp);
528 }
529
530 #define GEN_SHIFT(name) \
531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
532 { \
533 TCGv_i32 tmp1, tmp2, tmp3; \
534 tmp1 = tcg_temp_new_i32(); \
535 tcg_gen_andi_i32(tmp1, t1, 0xff); \
536 tmp2 = tcg_const_i32(0); \
537 tmp3 = tcg_const_i32(0x1f); \
538 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
539 tcg_temp_free_i32(tmp3); \
540 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
541 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
542 tcg_temp_free_i32(tmp2); \
543 tcg_temp_free_i32(tmp1); \
544 }
545 GEN_SHIFT(shl)
546 GEN_SHIFT(shr)
547 #undef GEN_SHIFT
548
549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
550 {
551 TCGv_i32 tmp1, tmp2;
552 tmp1 = tcg_temp_new_i32();
553 tcg_gen_andi_i32(tmp1, t1, 0xff);
554 tmp2 = tcg_const_i32(0x1f);
555 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
556 tcg_temp_free_i32(tmp2);
557 tcg_gen_sar_i32(dest, t0, tmp1);
558 tcg_temp_free_i32(tmp1);
559 }
560
561 static void shifter_out_im(TCGv_i32 var, int shift)
562 {
563 tcg_gen_extract_i32(cpu_CF, var, shift, 1);
564 }
565
566 /* Shift by immediate. Includes special handling for shift == 0. */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568 int shift, int flags)
569 {
570 switch (shiftop) {
571 case 0: /* LSL */
572 if (shift != 0) {
573 if (flags)
574 shifter_out_im(var, 32 - shift);
575 tcg_gen_shli_i32(var, var, shift);
576 }
577 break;
578 case 1: /* LSR */
579 if (shift == 0) {
580 if (flags) {
581 tcg_gen_shri_i32(cpu_CF, var, 31);
582 }
583 tcg_gen_movi_i32(var, 0);
584 } else {
585 if (flags)
586 shifter_out_im(var, shift - 1);
587 tcg_gen_shri_i32(var, var, shift);
588 }
589 break;
590 case 2: /* ASR */
591 if (shift == 0)
592 shift = 32;
593 if (flags)
594 shifter_out_im(var, shift - 1);
595 if (shift == 32)
596 shift = 31;
597 tcg_gen_sari_i32(var, var, shift);
598 break;
599 case 3: /* ROR/RRX */
600 if (shift != 0) {
601 if (flags)
602 shifter_out_im(var, shift - 1);
603 tcg_gen_rotri_i32(var, var, shift); break;
604 } else {
605 TCGv_i32 tmp = tcg_temp_new_i32();
606 tcg_gen_shli_i32(tmp, cpu_CF, 31);
607 if (flags)
608 shifter_out_im(var, 0);
609 tcg_gen_shri_i32(var, var, 1);
610 tcg_gen_or_i32(var, var, tmp);
611 tcg_temp_free_i32(tmp);
612 }
613 }
614 };
615
616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
617 TCGv_i32 shift, int flags)
618 {
619 if (flags) {
620 switch (shiftop) {
621 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
622 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
623 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
624 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
625 }
626 } else {
627 switch (shiftop) {
628 case 0:
629 gen_shl(var, var, shift);
630 break;
631 case 1:
632 gen_shr(var, var, shift);
633 break;
634 case 2:
635 gen_sar(var, var, shift);
636 break;
637 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
638 tcg_gen_rotr_i32(var, var, shift); break;
639 }
640 }
641 tcg_temp_free_i32(shift);
642 }
643
644 /*
645 * Generate a conditional based on ARM condition code cc.
646 * This is common between ARM and Aarch64 targets.
647 */
648 void arm_test_cc(DisasCompare *cmp, int cc)
649 {
650 TCGv_i32 value;
651 TCGCond cond;
652 bool global = true;
653
654 switch (cc) {
655 case 0: /* eq: Z */
656 case 1: /* ne: !Z */
657 cond = TCG_COND_EQ;
658 value = cpu_ZF;
659 break;
660
661 case 2: /* cs: C */
662 case 3: /* cc: !C */
663 cond = TCG_COND_NE;
664 value = cpu_CF;
665 break;
666
667 case 4: /* mi: N */
668 case 5: /* pl: !N */
669 cond = TCG_COND_LT;
670 value = cpu_NF;
671 break;
672
673 case 6: /* vs: V */
674 case 7: /* vc: !V */
675 cond = TCG_COND_LT;
676 value = cpu_VF;
677 break;
678
679 case 8: /* hi: C && !Z */
680 case 9: /* ls: !C || Z -> !(C && !Z) */
681 cond = TCG_COND_NE;
682 value = tcg_temp_new_i32();
683 global = false;
684 /* CF is 1 for C, so -CF is an all-bits-set mask for C;
685 ZF is non-zero for !Z; so AND the two subexpressions. */
686 tcg_gen_neg_i32(value, cpu_CF);
687 tcg_gen_and_i32(value, value, cpu_ZF);
688 break;
689
690 case 10: /* ge: N == V -> N ^ V == 0 */
691 case 11: /* lt: N != V -> N ^ V != 0 */
692 /* Since we're only interested in the sign bit, == 0 is >= 0. */
693 cond = TCG_COND_GE;
694 value = tcg_temp_new_i32();
695 global = false;
696 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
697 break;
698
699 case 12: /* gt: !Z && N == V */
700 case 13: /* le: Z || N != V */
701 cond = TCG_COND_NE;
702 value = tcg_temp_new_i32();
703 global = false;
704 /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
705 * the sign bit then AND with ZF to yield the result. */
706 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
707 tcg_gen_sari_i32(value, value, 31);
708 tcg_gen_andc_i32(value, cpu_ZF, value);
709 break;
710
711 case 14: /* always */
712 case 15: /* always */
713 /* Use the ALWAYS condition, which will fold early.
714 * It doesn't matter what we use for the value. */
715 cond = TCG_COND_ALWAYS;
716 value = cpu_ZF;
717 goto no_invert;
718
719 default:
720 fprintf(stderr, "Bad condition code 0x%x\n", cc);
721 abort();
722 }
723
724 if (cc & 1) {
725 cond = tcg_invert_cond(cond);
726 }
727
728 no_invert:
729 cmp->cond = cond;
730 cmp->value = value;
731 cmp->value_global = global;
732 }
733
734 void arm_free_cc(DisasCompare *cmp)
735 {
736 if (!cmp->value_global) {
737 tcg_temp_free_i32(cmp->value);
738 }
739 }
740
741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
742 {
743 tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
744 }
745
746 void arm_gen_test_cc(int cc, TCGLabel *label)
747 {
748 DisasCompare cmp;
749 arm_test_cc(&cmp, cc);
750 arm_jump_cc(&cmp, label);
751 arm_free_cc(&cmp);
752 }
753
754 static inline void gen_set_condexec(DisasContext *s)
755 {
756 if (s->condexec_mask) {
757 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
758 TCGv_i32 tmp = tcg_temp_new_i32();
759 tcg_gen_movi_i32(tmp, val);
760 store_cpu_field(tmp, condexec_bits);
761 }
762 }
763
764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
765 {
766 tcg_gen_movi_i32(cpu_R[15], val);
767 }
768
769 /* Set PC and Thumb state from var. var is marked as dead. */
770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
771 {
772 s->base.is_jmp = DISAS_JUMP;
773 tcg_gen_andi_i32(cpu_R[15], var, ~1);
774 tcg_gen_andi_i32(var, var, 1);
775 store_cpu_field(var, thumb);
776 }
777
778 /*
779 * Set PC and Thumb state from var. var is marked as dead.
780 * For M-profile CPUs, include logic to detect exception-return
781 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
782 * and BX reg, and no others, and happens only for code in Handler mode.
783 * The Security Extension also requires us to check for the FNC_RETURN
784 * which signals a function return from non-secure state; this can happen
785 * in both Handler and Thread mode.
786 * To avoid having to do multiple comparisons in inline generated code,
787 * we make the check we do here loose, so it will match for EXC_RETURN
788 * in Thread mode. For system emulation do_v7m_exception_exit() checks
789 * for these spurious cases and returns without doing anything (giving
790 * the same behaviour as for a branch to a non-magic address).
791 *
792 * In linux-user mode it is unclear what the right behaviour for an
793 * attempted FNC_RETURN should be, because in real hardware this will go
794 * directly to Secure code (ie not the Linux kernel) which will then treat
795 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
796 * attempt behave the way it would on a CPU without the security extension,
797 * which is to say "like a normal branch". That means we can simply treat
798 * all branches as normal with no magic address behaviour.
799 */
800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
801 {
802 /* Generate the same code here as for a simple bx, but flag via
803 * s->base.is_jmp that we need to do the rest of the work later.
804 */
805 gen_bx(s, var);
806 #ifndef CONFIG_USER_ONLY
807 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
808 (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
809 s->base.is_jmp = DISAS_BX_EXCRET;
810 }
811 #endif
812 }
813
814 static inline void gen_bx_excret_final_code(DisasContext *s)
815 {
816 /* Generate the code to finish possible exception return and end the TB */
817 TCGLabel *excret_label = gen_new_label();
818 uint32_t min_magic;
819
820 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
821 /* Covers FNC_RETURN and EXC_RETURN magic */
822 min_magic = FNC_RETURN_MIN_MAGIC;
823 } else {
824 /* EXC_RETURN magic only */
825 min_magic = EXC_RETURN_MIN_MAGIC;
826 }
827
828 /* Is the new PC value in the magic range indicating exception return? */
829 tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
830 /* No: end the TB as we would for a DISAS_JMP */
831 if (is_singlestepping(s)) {
832 gen_singlestep_exception(s);
833 } else {
834 tcg_gen_exit_tb(NULL, 0);
835 }
836 gen_set_label(excret_label);
837 /* Yes: this is an exception return.
838 * At this point in runtime env->regs[15] and env->thumb will hold
839 * the exception-return magic number, which do_v7m_exception_exit()
840 * will read. Nothing else will be able to see those values because
841 * the cpu-exec main loop guarantees that we will always go straight
842 * from raising the exception to the exception-handling code.
843 *
844 * gen_ss_advance(s) does nothing on M profile currently but
845 * calling it is conceptually the right thing as we have executed
846 * this instruction (compare SWI, HVC, SMC handling).
847 */
848 gen_ss_advance(s);
849 gen_exception_internal(EXCP_EXCEPTION_EXIT);
850 }
851
852 static inline void gen_bxns(DisasContext *s, int rm)
853 {
854 TCGv_i32 var = load_reg(s, rm);
855
856 /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
857 * we need to sync state before calling it, but:
858 * - we don't need to do gen_set_pc_im() because the bxns helper will
859 * always set the PC itself
860 * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
861 * unless it's outside an IT block or the last insn in an IT block,
862 * so we know that condexec == 0 (already set at the top of the TB)
863 * is correct in the non-UNPREDICTABLE cases, and we can choose
864 * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
865 */
866 gen_helper_v7m_bxns(cpu_env, var);
867 tcg_temp_free_i32(var);
868 s->base.is_jmp = DISAS_EXIT;
869 }
870
871 static inline void gen_blxns(DisasContext *s, int rm)
872 {
873 TCGv_i32 var = load_reg(s, rm);
874
875 /* We don't need to sync condexec state, for the same reason as bxns.
876 * We do however need to set the PC, because the blxns helper reads it.
877 * The blxns helper may throw an exception.
878 */
879 gen_set_pc_im(s, s->base.pc_next);
880 gen_helper_v7m_blxns(cpu_env, var);
881 tcg_temp_free_i32(var);
882 s->base.is_jmp = DISAS_EXIT;
883 }
884
885 /* Variant of store_reg which uses branch&exchange logic when storing
886 to r15 in ARM architecture v7 and above. The source must be a temporary
887 and will be marked as dead. */
888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
889 {
890 if (reg == 15 && ENABLE_ARCH_7) {
891 gen_bx(s, var);
892 } else {
893 store_reg(s, reg, var);
894 }
895 }
896
897 /* Variant of store_reg which uses branch&exchange logic when storing
898 * to r15 in ARM architecture v5T and above. This is used for storing
899 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
900 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
902 {
903 if (reg == 15 && ENABLE_ARCH_5) {
904 gen_bx_excret(s, var);
905 } else {
906 store_reg(s, reg, var);
907 }
908 }
909
910 #ifdef CONFIG_USER_ONLY
911 #define IS_USER_ONLY 1
912 #else
913 #define IS_USER_ONLY 0
914 #endif
915
916 /* Abstractions of "generate code to do a guest load/store for
917 * AArch32", where a vaddr is always 32 bits (and is zero
918 * extended if we're a 64 bit core) and data is also
919 * 32 bits unless specifically doing a 64 bit access.
920 * These functions work like tcg_gen_qemu_{ld,st}* except
921 * that the address argument is TCGv_i32 rather than TCGv.
922 */
923
924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
925 {
926 TCGv addr = tcg_temp_new();
927 tcg_gen_extu_i32_tl(addr, a32);
928
929 /* Not needed for user-mode BE32, where we use MO_BE instead. */
930 if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931 tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
932 }
933 return addr;
934 }
935
936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
937 int index, MemOp opc)
938 {
939 TCGv addr;
940
941 if (arm_dc_feature(s, ARM_FEATURE_M) &&
942 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
943 opc |= MO_ALIGN;
944 }
945
946 addr = gen_aa32_addr(s, a32, opc);
947 tcg_gen_qemu_ld_i32(val, addr, index, opc);
948 tcg_temp_free(addr);
949 }
950
951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
952 int index, MemOp opc)
953 {
954 TCGv addr;
955
956 if (arm_dc_feature(s, ARM_FEATURE_M) &&
957 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
958 opc |= MO_ALIGN;
959 }
960
961 addr = gen_aa32_addr(s, a32, opc);
962 tcg_gen_qemu_st_i32(val, addr, index, opc);
963 tcg_temp_free(addr);
964 }
965
966 #define DO_GEN_LD(SUFF, OPC) \
967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
968 TCGv_i32 a32, int index) \
969 { \
970 gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
971 }
972
973 #define DO_GEN_ST(SUFF, OPC) \
974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
975 TCGv_i32 a32, int index) \
976 { \
977 gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
978 }
979
980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
981 {
982 /* Not needed for user-mode BE32, where we use MO_BE instead. */
983 if (!IS_USER_ONLY && s->sctlr_b) {
984 tcg_gen_rotri_i64(val, val, 32);
985 }
986 }
987
988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
989 int index, MemOp opc)
990 {
991 TCGv addr = gen_aa32_addr(s, a32, opc);
992 tcg_gen_qemu_ld_i64(val, addr, index, opc);
993 gen_aa32_frob64(s, val);
994 tcg_temp_free(addr);
995 }
996
997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
998 TCGv_i32 a32, int index)
999 {
1000 gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1001 }
1002
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004 int index, MemOp opc)
1005 {
1006 TCGv addr = gen_aa32_addr(s, a32, opc);
1007
1008 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1009 if (!IS_USER_ONLY && s->sctlr_b) {
1010 TCGv_i64 tmp = tcg_temp_new_i64();
1011 tcg_gen_rotri_i64(tmp, val, 32);
1012 tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013 tcg_temp_free_i64(tmp);
1014 } else {
1015 tcg_gen_qemu_st_i64(val, addr, index, opc);
1016 }
1017 tcg_temp_free(addr);
1018 }
1019
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021 TCGv_i32 a32, int index)
1022 {
1023 gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1024 }
1025
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1032
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1034 {
1035 /* The pre HVC helper handles cases when HVC gets trapped
1036 * as an undefined insn by runtime configuration (ie before
1037 * the insn really executes).
1038 */
1039 gen_set_pc_im(s, s->pc_curr);
1040 gen_helper_pre_hvc(cpu_env);
1041 /* Otherwise we will treat this as a real exception which
1042 * happens after execution of the insn. (The distinction matters
1043 * for the PC value reported to the exception handler and also
1044 * for single stepping.)
1045 */
1046 s->svc_imm = imm16;
1047 gen_set_pc_im(s, s->base.pc_next);
1048 s->base.is_jmp = DISAS_HVC;
1049 }
1050
1051 static inline void gen_smc(DisasContext *s)
1052 {
1053 /* As with HVC, we may take an exception either before or after
1054 * the insn executes.
1055 */
1056 TCGv_i32 tmp;
1057
1058 gen_set_pc_im(s, s->pc_curr);
1059 tmp = tcg_const_i32(syn_aa32_smc());
1060 gen_helper_pre_smc(cpu_env, tmp);
1061 tcg_temp_free_i32(tmp);
1062 gen_set_pc_im(s, s->base.pc_next);
1063 s->base.is_jmp = DISAS_SMC;
1064 }
1065
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1067 {
1068 gen_set_condexec(s);
1069 gen_set_pc_im(s, pc);
1070 gen_exception_internal(excp);
1071 s->base.is_jmp = DISAS_NORETURN;
1072 }
1073
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075 int syn, uint32_t target_el)
1076 {
1077 gen_set_condexec(s);
1078 gen_set_pc_im(s, pc);
1079 gen_exception(excp, syn, target_el);
1080 s->base.is_jmp = DISAS_NORETURN;
1081 }
1082
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1084 {
1085 TCGv_i32 tcg_syn;
1086
1087 gen_set_condexec(s);
1088 gen_set_pc_im(s, s->pc_curr);
1089 tcg_syn = tcg_const_i32(syn);
1090 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091 tcg_temp_free_i32(tcg_syn);
1092 s->base.is_jmp = DISAS_NORETURN;
1093 }
1094
1095 static void unallocated_encoding(DisasContext *s)
1096 {
1097 /* Unallocated and reserved encodings are uncategorized */
1098 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099 default_exception_el(s));
1100 }
1101
1102 /* Force a TB lookup after an instruction that changes the CPU state. */
1103 static inline void gen_lookup_tb(DisasContext *s)
1104 {
1105 tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106 s->base.is_jmp = DISAS_EXIT;
1107 }
1108
1109 static inline void gen_hlt(DisasContext *s, int imm)
1110 {
1111 /* HLT. This has two purposes.
1112 * Architecturally, it is an external halting debug instruction.
1113 * Since QEMU doesn't implement external debug, we treat this as
1114 * it is required for halting debug disabled: it will UNDEF.
1115 * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116 * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117 * must trigger semihosting even for ARMv7 and earlier, where
1118 * HLT was an undefined encoding.
1119 * In system mode, we don't allow userspace access to
1120 * semihosting, to provide some semblance of security
1121 * (and for consistency with our 32-bit semihosting).
1122 */
1123 if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125 s->current_el != 0 &&
1126 #endif
1127 (imm == (s->thumb ? 0x3c : 0xf000))) {
1128 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129 return;
1130 }
1131
1132 unallocated_encoding(s);
1133 }
1134
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1136 {
1137 TCGv_ptr statusptr = tcg_temp_new_ptr();
1138 int offset;
1139 if (neon) {
1140 offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141 } else {
1142 offset = offsetof(CPUARMState, vfp.fp_status);
1143 }
1144 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145 return statusptr;
1146 }
1147
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1149 {
1150 if (dp) {
1151 return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152 } else {
1153 long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154 if (reg & 1) {
1155 ofs += offsetof(CPU_DoubleU, l.upper);
1156 } else {
1157 ofs += offsetof(CPU_DoubleU, l.lower);
1158 }
1159 return ofs;
1160 }
1161 }
1162
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164 zero is the least significant end of the register. */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1167 {
1168 int sreg;
1169 sreg = reg * 2 + n;
1170 return vfp_reg_offset(0, sreg);
1171 }
1172
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174 * where 0 is the least significant end of the register.
1175 */
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1178 {
1179 int element_size = 1 << size;
1180 int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182 /* Calculate the offset assuming fully little-endian,
1183 * then XOR to account for the order of the 8-byte units.
1184 */
1185 if (element_size < 8) {
1186 ofs ^= 8 - element_size;
1187 }
1188 #endif
1189 return neon_reg_offset(reg, 0) + ofs;
1190 }
1191
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1193 {
1194 TCGv_i32 tmp = tcg_temp_new_i32();
1195 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196 return tmp;
1197 }
1198
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1200 {
1201 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1202
1203 switch (mop) {
1204 case MO_UB:
1205 tcg_gen_ld8u_i32(var, cpu_env, offset);
1206 break;
1207 case MO_UW:
1208 tcg_gen_ld16u_i32(var, cpu_env, offset);
1209 break;
1210 case MO_UL:
1211 tcg_gen_ld_i32(var, cpu_env, offset);
1212 break;
1213 default:
1214 g_assert_not_reached();
1215 }
1216 }
1217
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1219 {
1220 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1221
1222 switch (mop) {
1223 case MO_UB:
1224 tcg_gen_ld8u_i64(var, cpu_env, offset);
1225 break;
1226 case MO_UW:
1227 tcg_gen_ld16u_i64(var, cpu_env, offset);
1228 break;
1229 case MO_UL:
1230 tcg_gen_ld32u_i64(var, cpu_env, offset);
1231 break;
1232 case MO_Q:
1233 tcg_gen_ld_i64(var, cpu_env, offset);
1234 break;
1235 default:
1236 g_assert_not_reached();
1237 }
1238 }
1239
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1241 {
1242 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243 tcg_temp_free_i32(var);
1244 }
1245
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1247 {
1248 long offset = neon_element_offset(reg, ele, size);
1249
1250 switch (size) {
1251 case MO_8:
1252 tcg_gen_st8_i32(var, cpu_env, offset);
1253 break;
1254 case MO_16:
1255 tcg_gen_st16_i32(var, cpu_env, offset);
1256 break;
1257 case MO_32:
1258 tcg_gen_st_i32(var, cpu_env, offset);
1259 break;
1260 default:
1261 g_assert_not_reached();
1262 }
1263 }
1264
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1266 {
1267 long offset = neon_element_offset(reg, ele, size);
1268
1269 switch (size) {
1270 case MO_8:
1271 tcg_gen_st8_i64(var, cpu_env, offset);
1272 break;
1273 case MO_16:
1274 tcg_gen_st16_i64(var, cpu_env, offset);
1275 break;
1276 case MO_32:
1277 tcg_gen_st32_i64(var, cpu_env, offset);
1278 break;
1279 case MO_64:
1280 tcg_gen_st_i64(var, cpu_env, offset);
1281 break;
1282 default:
1283 g_assert_not_reached();
1284 }
1285 }
1286
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1288 {
1289 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1290 }
1291
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1293 {
1294 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1295 }
1296
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1298 {
1299 tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1300 }
1301
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1303 {
1304 tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1305 }
1306
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1308 {
1309 TCGv_ptr ret = tcg_temp_new_ptr();
1310 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311 return ret;
1312 }
1313
1314 #define ARM_CP_RW_BIT (1 << 20)
1315
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1319
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1321 {
1322 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1323 }
1324
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1326 {
1327 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1328 }
1329
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1331 {
1332 TCGv_i32 var = tcg_temp_new_i32();
1333 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334 return var;
1335 }
1336
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1338 {
1339 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340 tcg_temp_free_i32(var);
1341 }
1342
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1344 {
1345 iwmmxt_store_reg(cpu_M0, rn);
1346 }
1347
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1349 {
1350 iwmmxt_load_reg(cpu_M0, rn);
1351 }
1352
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1354 {
1355 iwmmxt_load_reg(cpu_V1, rn);
1356 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1357 }
1358
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1360 {
1361 iwmmxt_load_reg(cpu_V1, rn);
1362 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1363 }
1364
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1366 {
1367 iwmmxt_load_reg(cpu_V1, rn);
1368 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1369 }
1370
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1373 { \
1374 iwmmxt_load_reg(cpu_V1, rn); \
1375 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1376 }
1377
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1380 { \
1381 iwmmxt_load_reg(cpu_V1, rn); \
1382 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1383 }
1384
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1389
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1392 { \
1393 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1394 }
1395
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1406
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1409
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1422
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1426
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1431
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1438
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1443
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1450
1451 static void gen_op_iwmmxt_set_mup(void)
1452 {
1453 TCGv_i32 tmp;
1454 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455 tcg_gen_ori_i32(tmp, tmp, 2);
1456 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1457 }
1458
1459 static void gen_op_iwmmxt_set_cup(void)
1460 {
1461 TCGv_i32 tmp;
1462 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463 tcg_gen_ori_i32(tmp, tmp, 1);
1464 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1465 }
1466
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1468 {
1469 TCGv_i32 tmp = tcg_temp_new_i32();
1470 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1472 }
1473
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1475 {
1476 iwmmxt_load_reg(cpu_V1, rn);
1477 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1479 }
1480
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482 TCGv_i32 dest)
1483 {
1484 int rd;
1485 uint32_t offset;
1486 TCGv_i32 tmp;
1487
1488 rd = (insn >> 16) & 0xf;
1489 tmp = load_reg(s, rd);
1490
1491 offset = (insn & 0xff) << ((insn >> 7) & 2);
1492 if (insn & (1 << 24)) {
1493 /* Pre indexed */
1494 if (insn & (1 << 23))
1495 tcg_gen_addi_i32(tmp, tmp, offset);
1496 else
1497 tcg_gen_addi_i32(tmp, tmp, -offset);
1498 tcg_gen_mov_i32(dest, tmp);
1499 if (insn & (1 << 21))
1500 store_reg(s, rd, tmp);
1501 else
1502 tcg_temp_free_i32(tmp);
1503 } else if (insn & (1 << 21)) {
1504 /* Post indexed */
1505 tcg_gen_mov_i32(dest, tmp);
1506 if (insn & (1 << 23))
1507 tcg_gen_addi_i32(tmp, tmp, offset);
1508 else
1509 tcg_gen_addi_i32(tmp, tmp, -offset);
1510 store_reg(s, rd, tmp);
1511 } else if (!(insn & (1 << 23)))
1512 return 1;
1513 return 0;
1514 }
1515
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1517 {
1518 int rd = (insn >> 0) & 0xf;
1519 TCGv_i32 tmp;
1520
1521 if (insn & (1 << 8)) {
1522 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523 return 1;
1524 } else {
1525 tmp = iwmmxt_load_creg(rd);
1526 }
1527 } else {
1528 tmp = tcg_temp_new_i32();
1529 iwmmxt_load_reg(cpu_V0, rd);
1530 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1531 }
1532 tcg_gen_andi_i32(tmp, tmp, mask);
1533 tcg_gen_mov_i32(dest, tmp);
1534 tcg_temp_free_i32(tmp);
1535 return 0;
1536 }
1537
1538 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1539 (ie. an undefined instruction). */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1541 {
1542 int rd, wrd;
1543 int rdhi, rdlo, rd0, rd1, i;
1544 TCGv_i32 addr;
1545 TCGv_i32 tmp, tmp2, tmp3;
1546
1547 if ((insn & 0x0e000e00) == 0x0c000000) {
1548 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549 wrd = insn & 0xf;
1550 rdlo = (insn >> 12) & 0xf;
1551 rdhi = (insn >> 16) & 0xf;
1552 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1553 iwmmxt_load_reg(cpu_V0, wrd);
1554 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556 } else { /* TMCRR */
1557 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558 iwmmxt_store_reg(cpu_V0, wrd);
1559 gen_op_iwmmxt_set_mup();
1560 }
1561 return 0;
1562 }
1563
1564 wrd = (insn >> 12) & 0xf;
1565 addr = tcg_temp_new_i32();
1566 if (gen_iwmmxt_address(s, insn, addr)) {
1567 tcg_temp_free_i32(addr);
1568 return 1;
1569 }
1570 if (insn & ARM_CP_RW_BIT) {
1571 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1572 tmp = tcg_temp_new_i32();
1573 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574 iwmmxt_store_creg(wrd, tmp);
1575 } else {
1576 i = 1;
1577 if (insn & (1 << 8)) {
1578 if (insn & (1 << 22)) { /* WLDRD */
1579 gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580 i = 0;
1581 } else { /* WLDRW wRd */
1582 tmp = tcg_temp_new_i32();
1583 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1584 }
1585 } else {
1586 tmp = tcg_temp_new_i32();
1587 if (insn & (1 << 22)) { /* WLDRH */
1588 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589 } else { /* WLDRB */
1590 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1591 }
1592 }
1593 if (i) {
1594 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595 tcg_temp_free_i32(tmp);
1596 }
1597 gen_op_iwmmxt_movq_wRn_M0(wrd);
1598 }
1599 } else {
1600 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1601 tmp = iwmmxt_load_creg(wrd);
1602 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603 } else {
1604 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605 tmp = tcg_temp_new_i32();
1606 if (insn & (1 << 8)) {
1607 if (insn & (1 << 22)) { /* WSTRD */
1608 gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609 } else { /* WSTRW wRd */
1610 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1612 }
1613 } else {
1614 if (insn & (1 << 22)) { /* WSTRH */
1615 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617 } else { /* WSTRB */
1618 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1620 }
1621 }
1622 }
1623 tcg_temp_free_i32(tmp);
1624 }
1625 tcg_temp_free_i32(addr);
1626 return 0;
1627 }
1628
1629 if ((insn & 0x0f000000) != 0x0e000000)
1630 return 1;
1631
1632 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633 case 0x000: /* WOR */
1634 wrd = (insn >> 12) & 0xf;
1635 rd0 = (insn >> 0) & 0xf;
1636 rd1 = (insn >> 16) & 0xf;
1637 gen_op_iwmmxt_movq_M0_wRn(rd0);
1638 gen_op_iwmmxt_orq_M0_wRn(rd1);
1639 gen_op_iwmmxt_setpsr_nz();
1640 gen_op_iwmmxt_movq_wRn_M0(wrd);
1641 gen_op_iwmmxt_set_mup();
1642 gen_op_iwmmxt_set_cup();
1643 break;
1644 case 0x011: /* TMCR */
1645 if (insn & 0xf)
1646 return 1;
1647 rd = (insn >> 12) & 0xf;
1648 wrd = (insn >> 16) & 0xf;
1649 switch (wrd) {
1650 case ARM_IWMMXT_wCID:
1651 case ARM_IWMMXT_wCASF:
1652 break;
1653 case ARM_IWMMXT_wCon:
1654 gen_op_iwmmxt_set_cup();
1655 /* Fall through. */
1656 case ARM_IWMMXT_wCSSF:
1657 tmp = iwmmxt_load_creg(wrd);
1658 tmp2 = load_reg(s, rd);
1659 tcg_gen_andc_i32(tmp, tmp, tmp2);
1660 tcg_temp_free_i32(tmp2);
1661 iwmmxt_store_creg(wrd, tmp);
1662 break;
1663 case ARM_IWMMXT_wCGR0:
1664 case ARM_IWMMXT_wCGR1:
1665 case ARM_IWMMXT_wCGR2:
1666 case ARM_IWMMXT_wCGR3:
1667 gen_op_iwmmxt_set_cup();
1668 tmp = load_reg(s, rd);
1669 iwmmxt_store_creg(wrd, tmp);
1670 break;
1671 default:
1672 return 1;
1673 }
1674 break;
1675 case 0x100: /* WXOR */
1676 wrd = (insn >> 12) & 0xf;
1677 rd0 = (insn >> 0) & 0xf;
1678 rd1 = (insn >> 16) & 0xf;
1679 gen_op_iwmmxt_movq_M0_wRn(rd0);
1680 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681 gen_op_iwmmxt_setpsr_nz();
1682 gen_op_iwmmxt_movq_wRn_M0(wrd);
1683 gen_op_iwmmxt_set_mup();
1684 gen_op_iwmmxt_set_cup();
1685 break;
1686 case 0x111: /* TMRC */
1687 if (insn & 0xf)
1688 return 1;
1689 rd = (insn >> 12) & 0xf;
1690 wrd = (insn >> 16) & 0xf;
1691 tmp = iwmmxt_load_creg(wrd);
1692 store_reg(s, rd, tmp);
1693 break;
1694 case 0x300: /* WANDN */
1695 wrd = (insn >> 12) & 0xf;
1696 rd0 = (insn >> 0) & 0xf;
1697 rd1 = (insn >> 16) & 0xf;
1698 gen_op_iwmmxt_movq_M0_wRn(rd0);
1699 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700 gen_op_iwmmxt_andq_M0_wRn(rd1);
1701 gen_op_iwmmxt_setpsr_nz();
1702 gen_op_iwmmxt_movq_wRn_M0(wrd);
1703 gen_op_iwmmxt_set_mup();
1704 gen_op_iwmmxt_set_cup();
1705 break;
1706 case 0x200: /* WAND */
1707 wrd = (insn >> 12) & 0xf;
1708 rd0 = (insn >> 0) & 0xf;
1709 rd1 = (insn >> 16) & 0xf;
1710 gen_op_iwmmxt_movq_M0_wRn(rd0);
1711 gen_op_iwmmxt_andq_M0_wRn(rd1);
1712 gen_op_iwmmxt_setpsr_nz();
1713 gen_op_iwmmxt_movq_wRn_M0(wrd);
1714 gen_op_iwmmxt_set_mup();
1715 gen_op_iwmmxt_set_cup();
1716 break;
1717 case 0x810: case 0xa10: /* WMADD */
1718 wrd = (insn >> 12) & 0xf;
1719 rd0 = (insn >> 0) & 0xf;
1720 rd1 = (insn >> 16) & 0xf;
1721 gen_op_iwmmxt_movq_M0_wRn(rd0);
1722 if (insn & (1 << 21))
1723 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724 else
1725 gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726 gen_op_iwmmxt_movq_wRn_M0(wrd);
1727 gen_op_iwmmxt_set_mup();
1728 break;
1729 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
1730 wrd = (insn >> 12) & 0xf;
1731 rd0 = (insn >> 16) & 0xf;
1732 rd1 = (insn >> 0) & 0xf;
1733 gen_op_iwmmxt_movq_M0_wRn(rd0);
1734 switch ((insn >> 22) & 3) {
1735 case 0:
1736 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737 break;
1738 case 1:
1739 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740 break;
1741 case 2:
1742 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743 break;
1744 case 3:
1745 return 1;
1746 }
1747 gen_op_iwmmxt_movq_wRn_M0(wrd);
1748 gen_op_iwmmxt_set_mup();
1749 gen_op_iwmmxt_set_cup();
1750 break;
1751 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
1752 wrd = (insn >> 12) & 0xf;
1753 rd0 = (insn >> 16) & 0xf;
1754 rd1 = (insn >> 0) & 0xf;
1755 gen_op_iwmmxt_movq_M0_wRn(rd0);
1756 switch ((insn >> 22) & 3) {
1757 case 0:
1758 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759 break;
1760 case 1:
1761 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762 break;
1763 case 2:
1764 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765 break;
1766 case 3:
1767 return 1;
1768 }
1769 gen_op_iwmmxt_movq_wRn_M0(wrd);
1770 gen_op_iwmmxt_set_mup();
1771 gen_op_iwmmxt_set_cup();
1772 break;
1773 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
1774 wrd = (insn >> 12) & 0xf;
1775 rd0 = (insn >> 16) & 0xf;
1776 rd1 = (insn >> 0) & 0xf;
1777 gen_op_iwmmxt_movq_M0_wRn(rd0);
1778 if (insn & (1 << 22))
1779 gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780 else
1781 gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782 if (!(insn & (1 << 20)))
1783 gen_op_iwmmxt_addl_M0_wRn(wrd);
1784 gen_op_iwmmxt_movq_wRn_M0(wrd);
1785 gen_op_iwmmxt_set_mup();
1786 break;
1787 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
1788 wrd = (insn >> 12) & 0xf;
1789 rd0 = (insn >> 16) & 0xf;
1790 rd1 = (insn >> 0) & 0xf;
1791 gen_op_iwmmxt_movq_M0_wRn(rd0);
1792 if (insn & (1 << 21)) {
1793 if (insn & (1 << 20))
1794 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795 else
1796 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797 } else {
1798 if (insn & (1 << 20))
1799 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800 else
1801 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1802 }
1803 gen_op_iwmmxt_movq_wRn_M0(wrd);
1804 gen_op_iwmmxt_set_mup();
1805 break;
1806 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
1807 wrd = (insn >> 12) & 0xf;
1808 rd0 = (insn >> 16) & 0xf;
1809 rd1 = (insn >> 0) & 0xf;
1810 gen_op_iwmmxt_movq_M0_wRn(rd0);
1811 if (insn & (1 << 21))
1812 gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813 else
1814 gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815 if (!(insn & (1 << 20))) {
1816 iwmmxt_load_reg(cpu_V1, wrd);
1817 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1818 }
1819 gen_op_iwmmxt_movq_wRn_M0(wrd);
1820 gen_op_iwmmxt_set_mup();
1821 break;
1822 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
1823 wrd = (insn >> 12) & 0xf;
1824 rd0 = (insn >> 16) & 0xf;
1825 rd1 = (insn >> 0) & 0xf;
1826 gen_op_iwmmxt_movq_M0_wRn(rd0);
1827 switch ((insn >> 22) & 3) {
1828 case 0:
1829 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830 break;
1831 case 1:
1832 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833 break;
1834 case 2:
1835 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836 break;
1837 case 3:
1838 return 1;
1839 }
1840 gen_op_iwmmxt_movq_wRn_M0(wrd);
1841 gen_op_iwmmxt_set_mup();
1842 gen_op_iwmmxt_set_cup();
1843 break;
1844 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
1845 wrd = (insn >> 12) & 0xf;
1846 rd0 = (insn >> 16) & 0xf;
1847 rd1 = (insn >> 0) & 0xf;
1848 gen_op_iwmmxt_movq_M0_wRn(rd0);
1849 if (insn & (1 << 22)) {
1850 if (insn & (1 << 20))
1851 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852 else
1853 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854 } else {
1855 if (insn & (1 << 20))
1856 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857 else
1858 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1859 }
1860 gen_op_iwmmxt_movq_wRn_M0(wrd);
1861 gen_op_iwmmxt_set_mup();
1862 gen_op_iwmmxt_set_cup();
1863 break;
1864 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
1865 wrd = (insn >> 12) & 0xf;
1866 rd0 = (insn >> 16) & 0xf;
1867 rd1 = (insn >> 0) & 0xf;
1868 gen_op_iwmmxt_movq_M0_wRn(rd0);
1869 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870 tcg_gen_andi_i32(tmp, tmp, 7);
1871 iwmmxt_load_reg(cpu_V1, rd1);
1872 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873 tcg_temp_free_i32(tmp);
1874 gen_op_iwmmxt_movq_wRn_M0(wrd);
1875 gen_op_iwmmxt_set_mup();
1876 break;
1877 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
1878 if (((insn >> 6) & 3) == 3)
1879 return 1;
1880 rd = (insn >> 12) & 0xf;
1881 wrd = (insn >> 16) & 0xf;
1882 tmp = load_reg(s, rd);
1883 gen_op_iwmmxt_movq_M0_wRn(wrd);
1884 switch ((insn >> 6) & 3) {
1885 case 0:
1886 tmp2 = tcg_const_i32(0xff);
1887 tmp3 = tcg_const_i32((insn & 7) << 3);
1888 break;
1889 case 1:
1890 tmp2 = tcg_const_i32(0xffff);
1891 tmp3 = tcg_const_i32((insn & 3) << 4);
1892 break;
1893 case 2:
1894 tmp2 = tcg_const_i32(0xffffffff);
1895 tmp3 = tcg_const_i32((insn & 1) << 5);
1896 break;
1897 default:
1898 tmp2 = NULL;
1899 tmp3 = NULL;
1900 }
1901 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902 tcg_temp_free_i32(tmp3);
1903 tcg_temp_free_i32(tmp2);
1904 tcg_temp_free_i32(tmp);
1905 gen_op_iwmmxt_movq_wRn_M0(wrd);
1906 gen_op_iwmmxt_set_mup();
1907 break;
1908 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
1909 rd = (insn >> 12) & 0xf;
1910 wrd = (insn >> 16) & 0xf;
1911 if (rd == 15 || ((insn >> 22) & 3) == 3)
1912 return 1;
1913 gen_op_iwmmxt_movq_M0_wRn(wrd);
1914 tmp = tcg_temp_new_i32();
1915 switch ((insn >> 22) & 3) {
1916 case 0:
1917 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919 if (insn & 8) {
1920 tcg_gen_ext8s_i32(tmp, tmp);
1921 } else {
1922 tcg_gen_andi_i32(tmp, tmp, 0xff);
1923 }
1924 break;
1925 case 1:
1926 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928 if (insn & 8) {
1929 tcg_gen_ext16s_i32(tmp, tmp);
1930 } else {
1931 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1932 }
1933 break;
1934 case 2:
1935 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937 break;
1938 }
1939 store_reg(s, rd, tmp);
1940 break;
1941 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
1942 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943 return 1;
1944 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945 switch ((insn >> 22) & 3) {
1946 case 0:
1947 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948 break;
1949 case 1:
1950 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951 break;
1952 case 2:
1953 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954 break;
1955 }
1956 tcg_gen_shli_i32(tmp, tmp, 28);
1957 gen_set_nzcv(tmp);
1958 tcg_temp_free_i32(tmp);
1959 break;
1960 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
1961 if (((insn >> 6) & 3) == 3)
1962 return 1;
1963 rd = (insn >> 12) & 0xf;
1964 wrd = (insn >> 16) & 0xf;
1965 tmp = load_reg(s, rd);
1966 switch ((insn >> 6) & 3) {
1967 case 0:
1968 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969 break;
1970 case 1:
1971 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972 break;
1973 case 2:
1974 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975 break;
1976 }
1977 tcg_temp_free_i32(tmp);
1978 gen_op_iwmmxt_movq_wRn_M0(wrd);
1979 gen_op_iwmmxt_set_mup();
1980 break;
1981 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
1982 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983 return 1;
1984 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985 tmp2 = tcg_temp_new_i32();
1986 tcg_gen_mov_i32(tmp2, tmp);
1987 switch ((insn >> 22) & 3) {
1988 case 0:
1989 for (i = 0; i < 7; i ++) {
1990 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991 tcg_gen_and_i32(tmp, tmp, tmp2);
1992 }
1993 break;
1994 case 1:
1995 for (i = 0; i < 3; i ++) {
1996 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997 tcg_gen_and_i32(tmp, tmp, tmp2);
1998 }
1999 break;
2000 case 2:
2001 tcg_gen_shli_i32(tmp2, tmp2, 16);
2002 tcg_gen_and_i32(tmp, tmp, tmp2);
2003 break;
2004 }
2005 gen_set_nzcv(tmp);
2006 tcg_temp_free_i32(tmp2);
2007 tcg_temp_free_i32(tmp);
2008 break;
2009 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
2010 wrd = (insn >> 12) & 0xf;
2011 rd0 = (insn >> 16) & 0xf;
2012 gen_op_iwmmxt_movq_M0_wRn(rd0);
2013 switch ((insn >> 22) & 3) {
2014 case 0:
2015 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016 break;
2017 case 1:
2018 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019 break;
2020 case 2:
2021 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022 break;
2023 case 3:
2024 return 1;
2025 }
2026 gen_op_iwmmxt_movq_wRn_M0(wrd);
2027 gen_op_iwmmxt_set_mup();
2028 break;
2029 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
2030 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031 return 1;
2032 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033 tmp2 = tcg_temp_new_i32();
2034 tcg_gen_mov_i32(tmp2, tmp);
2035 switch ((insn >> 22) & 3) {
2036 case 0:
2037 for (i = 0; i < 7; i ++) {
2038 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039 tcg_gen_or_i32(tmp, tmp, tmp2);
2040 }
2041 break;
2042 case 1:
2043 for (i = 0; i < 3; i ++) {
2044 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045 tcg_gen_or_i32(tmp, tmp, tmp2);
2046 }
2047 break;
2048 case 2:
2049 tcg_gen_shli_i32(tmp2, tmp2, 16);
2050 tcg_gen_or_i32(tmp, tmp, tmp2);
2051 break;
2052 }
2053 gen_set_nzcv(tmp);
2054 tcg_temp_free_i32(tmp2);
2055 tcg_temp_free_i32(tmp);
2056 break;
2057 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
2058 rd = (insn >> 12) & 0xf;
2059 rd0 = (insn >> 16) & 0xf;
2060 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061 return 1;
2062 gen_op_iwmmxt_movq_M0_wRn(rd0);
2063 tmp = tcg_temp_new_i32();
2064 switch ((insn >> 22) & 3) {
2065 case 0:
2066 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067 break;
2068 case 1:
2069 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070 break;
2071 case 2:
2072 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073 break;
2074 }
2075 store_reg(s, rd, tmp);
2076 break;
2077 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
2078 case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079 wrd = (insn >> 12) & 0xf;
2080 rd0 = (insn >> 16) & 0xf;
2081 rd1 = (insn >> 0) & 0xf;
2082 gen_op_iwmmxt_movq_M0_wRn(rd0);
2083 switch ((insn >> 22) & 3) {
2084 case 0:
2085 if (insn & (1 << 21))
2086 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087 else
2088 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089 break;
2090 case 1:
2091 if (insn & (1 << 21))
2092 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093 else
2094 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095 break;
2096 case 2:
2097 if (insn & (1 << 21))
2098 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099 else
2100 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101 break;
2102 case 3:
2103 return 1;
2104 }
2105 gen_op_iwmmxt_movq_wRn_M0(wrd);
2106 gen_op_iwmmxt_set_mup();
2107 gen_op_iwmmxt_set_cup();
2108 break;
2109 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
2110 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111 wrd = (insn >> 12) & 0xf;
2112 rd0 = (insn >> 16) & 0xf;
2113 gen_op_iwmmxt_movq_M0_wRn(rd0);
2114 switch ((insn >> 22) & 3) {
2115 case 0:
2116 if (insn & (1 << 21))
2117 gen_op_iwmmxt_unpacklsb_M0();
2118 else
2119 gen_op_iwmmxt_unpacklub_M0();
2120 break;
2121 case 1:
2122 if (insn & (1 << 21))
2123 gen_op_iwmmxt_unpacklsw_M0();
2124 else
2125 gen_op_iwmmxt_unpackluw_M0();
2126 break;
2127 case 2:
2128 if (insn & (1 << 21))
2129 gen_op_iwmmxt_unpacklsl_M0();
2130 else
2131 gen_op_iwmmxt_unpacklul_M0();
2132 break;
2133 case 3:
2134 return 1;
2135 }
2136 gen_op_iwmmxt_movq_wRn_M0(wrd);
2137 gen_op_iwmmxt_set_mup();
2138 gen_op_iwmmxt_set_cup();
2139 break;
2140 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2141 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142 wrd = (insn >> 12) & 0xf;
2143 rd0 = (insn >> 16) & 0xf;
2144 gen_op_iwmmxt_movq_M0_wRn(rd0);
2145 switch ((insn >> 22) & 3) {
2146 case 0:
2147 if (insn & (1 << 21))
2148 gen_op_iwmmxt_unpackhsb_M0();
2149 else
2150 gen_op_iwmmxt_unpackhub_M0();
2151 break;
2152 case 1:
2153 if (insn & (1 << 21))
2154 gen_op_iwmmxt_unpackhsw_M0();
2155 else
2156 gen_op_iwmmxt_unpackhuw_M0();
2157 break;
2158 case 2:
2159 if (insn & (1 << 21))
2160 gen_op_iwmmxt_unpackhsl_M0();
2161 else
2162 gen_op_iwmmxt_unpackhul_M0();
2163 break;
2164 case 3:
2165 return 1;
2166 }
2167 gen_op_iwmmxt_movq_wRn_M0(wrd);
2168 gen_op_iwmmxt_set_mup();
2169 gen_op_iwmmxt_set_cup();
2170 break;
2171 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2172 case 0x214: case 0x614: case 0xa14: case 0xe14:
2173 if (((insn >> 22) & 3) == 0)
2174 return 1;
2175 wrd = (insn >> 12) & 0xf;
2176 rd0 = (insn >> 16) & 0xf;
2177 gen_op_iwmmxt_movq_M0_wRn(rd0);
2178 tmp = tcg_temp_new_i32();
2179 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180 tcg_temp_free_i32(tmp);
2181 return 1;
2182 }
2183 switch ((insn >> 22) & 3) {
2184 case 1:
2185 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186 break;
2187 case 2:
2188 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189 break;
2190 case 3:
2191 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192 break;
2193 }
2194 tcg_temp_free_i32(tmp);
2195 gen_op_iwmmxt_movq_wRn_M0(wrd);
2196 gen_op_iwmmxt_set_mup();
2197 gen_op_iwmmxt_set_cup();
2198 break;
2199 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2200 case 0x014: case 0x414: case 0x814: case 0xc14:
2201 if (((insn >> 22) & 3) == 0)
2202 return 1;
2203 wrd = (insn >> 12) & 0xf;
2204 rd0 = (insn >> 16) & 0xf;
2205 gen_op_iwmmxt_movq_M0_wRn(rd0);
2206 tmp = tcg_temp_new_i32();
2207 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208 tcg_temp_free_i32(tmp);
2209 return 1;
2210 }
2211 switch ((insn >> 22) & 3) {
2212 case 1:
2213 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214 break;
2215 case 2:
2216 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217 break;
2218 case 3:
2219 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220 break;
2221 }
2222 tcg_temp_free_i32(tmp);
2223 gen_op_iwmmxt_movq_wRn_M0(wrd);
2224 gen_op_iwmmxt_set_mup();
2225 gen_op_iwmmxt_set_cup();
2226 break;
2227 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2228 case 0x114: case 0x514: case 0x914: case 0xd14:
2229 if (((insn >> 22) & 3) == 0)
2230 return 1;
2231 wrd = (insn >> 12) & 0xf;
2232 rd0 = (insn >> 16) & 0xf;
2233 gen_op_iwmmxt_movq_M0_wRn(rd0);
2234 tmp = tcg_temp_new_i32();
2235 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236 tcg_temp_free_i32(tmp);
2237 return 1;
2238 }
2239 switch ((insn >> 22) & 3) {
2240 case 1:
2241 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242 break;
2243 case 2:
2244 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245 break;
2246 case 3:
2247 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248 break;
2249 }
2250 tcg_temp_free_i32(tmp);
2251 gen_op_iwmmxt_movq_wRn_M0(wrd);
2252 gen_op_iwmmxt_set_mup();
2253 gen_op_iwmmxt_set_cup();
2254 break;
2255 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2256 case 0x314: case 0x714: case 0xb14: case 0xf14:
2257 if (((insn >> 22) & 3) == 0)
2258 return 1;
2259 wrd = (insn >> 12) & 0xf;
2260 rd0 = (insn >> 16) & 0xf;
2261 gen_op_iwmmxt_movq_M0_wRn(rd0);
2262 tmp = tcg_temp_new_i32();
2263 switch ((insn >> 22) & 3) {
2264 case 1:
2265 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266 tcg_temp_free_i32(tmp);
2267 return 1;
2268 }
2269 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270 break;
2271 case 2:
2272 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273 tcg_temp_free_i32(tmp);
2274 return 1;
2275 }
2276 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277 break;
2278 case 3:
2279 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280 tcg_temp_free_i32(tmp);
2281 return 1;
2282 }
2283 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284 break;
2285 }
2286 tcg_temp_free_i32(tmp);
2287 gen_op_iwmmxt_movq_wRn_M0(wrd);
2288 gen_op_iwmmxt_set_mup();
2289 gen_op_iwmmxt_set_cup();
2290 break;
2291 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2292 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293 wrd = (insn >> 12) & 0xf;
2294 rd0 = (insn >> 16) & 0xf;
2295 rd1 = (insn >> 0) & 0xf;
2296 gen_op_iwmmxt_movq_M0_wRn(rd0);
2297 switch ((insn >> 22) & 3) {
2298 case 0:
2299 if (insn & (1 << 21))
2300 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301 else
2302 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303 break;
2304 case 1:
2305 if (insn & (1 << 21))
2306 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307 else
2308 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309 break;
2310 case 2:
2311 if (insn & (1 << 21))
2312 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313 else
2314 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315 break;
2316 case 3:
2317 return 1;
2318 }
2319 gen_op_iwmmxt_movq_wRn_M0(wrd);
2320 gen_op_iwmmxt_set_mup();
2321 break;
2322 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2323 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324 wrd = (insn >> 12) & 0xf;
2325 rd0 = (insn >> 16) & 0xf;
2326 rd1 = (insn >> 0) & 0xf;
2327 gen_op_iwmmxt_movq_M0_wRn(rd0);
2328 switch ((insn >> 22) & 3) {
2329 case 0:
2330 if (insn & (1 << 21))
2331 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332 else
2333 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334 break;
2335 case 1:
2336 if (insn & (1 << 21))
2337 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338 else
2339 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340 break;
2341 case 2:
2342 if (insn & (1 << 21))
2343 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344 else
2345 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346 break;
2347 case 3:
2348 return 1;
2349 }
2350 gen_op_iwmmxt_movq_wRn_M0(wrd);
2351 gen_op_iwmmxt_set_mup();
2352 break;
2353 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2354 case 0x402: case 0x502: case 0x602: case 0x702:
2355 wrd = (insn >> 12) & 0xf;
2356 rd0 = (insn >> 16) & 0xf;
2357 rd1 = (insn >> 0) & 0xf;
2358 gen_op_iwmmxt_movq_M0_wRn(rd0);
2359 tmp = tcg_const_i32((insn >> 20) & 3);
2360 iwmmxt_load_reg(cpu_V1, rd1);
2361 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362 tcg_temp_free_i32(tmp);
2363 gen_op_iwmmxt_movq_wRn_M0(wrd);
2364 gen_op_iwmmxt_set_mup();
2365 break;
2366 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2367 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370 wrd = (insn >> 12) & 0xf;
2371 rd0 = (insn >> 16) & 0xf;
2372 rd1 = (insn >> 0) & 0xf;
2373 gen_op_iwmmxt_movq_M0_wRn(rd0);
2374 switch ((insn >> 20) & 0xf) {
2375 case 0x0:
2376 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377 break;
2378 case 0x1:
2379 gen_op_iwmmxt_subub_M0_wRn(rd1);
2380 break;
2381 case 0x3:
2382 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383 break;
2384 case 0x4:
2385 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386 break;
2387 case 0x5:
2388 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389 break;
2390 case 0x7:
2391 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392 break;
2393 case 0x8:
2394 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395 break;
2396 case 0x9:
2397 gen_op_iwmmxt_subul_M0_wRn(rd1);
2398 break;
2399 case 0xb:
2400 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401 break;
2402 default:
2403 return 1;
2404 }
2405 gen_op_iwmmxt_movq_wRn_M0(wrd);
2406 gen_op_iwmmxt_set_mup();
2407 gen_op_iwmmxt_set_cup();
2408 break;
2409 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2410 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413 wrd = (insn >> 12) & 0xf;
2414 rd0 = (insn >> 16) & 0xf;
2415 gen_op_iwmmxt_movq_M0_wRn(rd0);
2416 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418 tcg_temp_free_i32(tmp);
2419 gen_op_iwmmxt_movq_wRn_M0(wrd);
2420 gen_op_iwmmxt_set_mup();
2421 gen_op_iwmmxt_set_cup();
2422 break;
2423 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2424 case 0x418: case 0x518: case 0x618: case 0x718:
2425 case 0x818: case 0x918: case 0xa18: case 0xb18:
2426 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427 wrd = (insn >> 12) & 0xf;
2428 rd0 = (insn >> 16) & 0xf;
2429 rd1 = (insn >> 0) & 0xf;
2430 gen_op_iwmmxt_movq_M0_wRn(rd0);
2431 switch ((insn >> 20) & 0xf) {
2432 case 0x0:
2433 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434 break;
2435 case 0x1:
2436 gen_op_iwmmxt_addub_M0_wRn(rd1);
2437 break;
2438 case 0x3:
2439 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440 break;
2441 case 0x4:
2442 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443 break;
2444 case 0x5:
2445 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446 break;
2447 case 0x7:
2448 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449 break;
2450 case 0x8:
2451 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452 break;
2453 case 0x9:
2454 gen_op_iwmmxt_addul_M0_wRn(rd1);
2455 break;
2456 case 0xb:
2457 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458 break;
2459 default:
2460 return 1;
2461 }
2462 gen_op_iwmmxt_movq_wRn_M0(wrd);
2463 gen_op_iwmmxt_set_mup();
2464 gen_op_iwmmxt_set_cup();
2465 break;
2466 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2467 case 0x408: case 0x508: case 0x608: case 0x708:
2468 case 0x808: case 0x908: case 0xa08: case 0xb08:
2469 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471 return 1;
2472 wrd = (insn >> 12) & 0xf;
2473 rd0 = (insn >> 16) & 0xf;
2474 rd1 = (insn >> 0) & 0xf;
2475 gen_op_iwmmxt_movq_M0_wRn(rd0);
2476 switch ((insn >> 22) & 3) {
2477 case 1:
2478 if (insn & (1 << 21))
2479 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480 else
2481 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482 break;
2483 case 2:
2484 if (insn & (1 << 21))
2485 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486 else
2487 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488 break;
2489 case 3:
2490 if (insn & (1 << 21))
2491 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492 else
2493 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494 break;
2495 }
2496 gen_op_iwmmxt_movq_wRn_M0(wrd);
2497 gen_op_iwmmxt_set_mup();
2498 gen_op_iwmmxt_set_cup();
2499 break;
2500 case 0x201: case 0x203: case 0x205: case 0x207:
2501 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502 case 0x211: case 0x213: case 0x215: case 0x217:
2503 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504 wrd = (insn >> 5) & 0xf;
2505 rd0 = (insn >> 12) & 0xf;
2506 rd1 = (insn >> 0) & 0xf;
2507 if (rd0 == 0xf || rd1 == 0xf)
2508 return 1;
2509 gen_op_iwmmxt_movq_M0_wRn(wrd);
2510 tmp = load_reg(s, rd0);
2511 tmp2 = load_reg(s, rd1);
2512 switch ((insn >> 16) & 0xf) {
2513 case 0x0: /* TMIA */
2514 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515 break;
2516 case 0x8: /* TMIAPH */
2517 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518 break;
2519 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2520 if (insn & (1 << 16))
2521 tcg_gen_shri_i32(tmp, tmp, 16);
2522 if (insn & (1 << 17))
2523 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525 break;
2526 default:
2527 tcg_temp_free_i32(tmp2);
2528 tcg_temp_free_i32(tmp);
2529 return 1;
2530 }
2531 tcg_temp_free_i32(tmp2);
2532 tcg_temp_free_i32(tmp);
2533 gen_op_iwmmxt_movq_wRn_M0(wrd);
2534 gen_op_iwmmxt_set_mup();
2535 break;
2536 default:
2537 return 1;
2538 }
2539
2540 return 0;
2541 }
2542
2543 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2544 (ie. an undefined instruction). */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2546 {
2547 int acc, rd0, rd1, rdhi, rdlo;
2548 TCGv_i32 tmp, tmp2;
2549
2550 if ((insn & 0x0ff00f10) == 0x0e200010) {
2551 /* Multiply with Internal Accumulate Format */
2552 rd0 = (insn >> 12) & 0xf;
2553 rd1 = insn & 0xf;
2554 acc = (insn >> 5) & 7;
2555
2556 if (acc != 0)
2557 return 1;
2558
2559 tmp = load_reg(s, rd0);
2560 tmp2 = load_reg(s, rd1);
2561 switch ((insn >> 16) & 0xf) {
2562 case 0x0: /* MIA */
2563 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564 break;
2565 case 0x8: /* MIAPH */
2566 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567 break;
2568 case 0xc: /* MIABB */
2569 case 0xd: /* MIABT */
2570 case 0xe: /* MIATB */
2571 case 0xf: /* MIATT */
2572 if (insn & (1 << 16))
2573 tcg_gen_shri_i32(tmp, tmp, 16);
2574 if (insn & (1 << 17))
2575 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577 break;
2578 default:
2579 return 1;
2580 }
2581 tcg_temp_free_i32(tmp2);
2582 tcg_temp_free_i32(tmp);
2583
2584 gen_op_iwmmxt_movq_wRn_M0(acc);
2585 return 0;
2586 }
2587
2588 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589 /* Internal Accumulator Access Format */
2590 rdhi = (insn >> 16) & 0xf;
2591 rdlo = (insn >> 12) & 0xf;
2592 acc = insn & 7;
2593
2594 if (acc != 0)
2595 return 1;
2596
2597 if (insn & ARM_CP_RW_BIT) { /* MRA */
2598 iwmmxt_load_reg(cpu_V0, acc);
2599 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602 } else { /* MAR */
2603 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604 iwmmxt_store_reg(cpu_V0, acc);
2605 }
2606 return 0;
2607 }
2608
2609 return 1;
2610 }
2611
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614 if (dc_isar_feature(aa32_simd_r32, s)) { \
2615 reg = (((insn) >> (bigbit)) & 0x0f) \
2616 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617 } else { \
2618 if (insn & (1 << (smallbit))) \
2619 return 1; \
2620 reg = ((insn) >> (bigbit)) & 0x0f; \
2621 }} while (0)
2622
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2626
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2628 {
2629 TCGv_i32 tmp = tcg_temp_new_i32();
2630 tcg_gen_ext16u_i32(var, var);
2631 tcg_gen_shli_i32(tmp, var, 16);
2632 tcg_gen_or_i32(var, var, tmp);
2633 tcg_temp_free_i32(tmp);
2634 }
2635
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2637 {
2638 TCGv_i32 tmp = tcg_temp_new_i32();
2639 tcg_gen_andi_i32(var, var, 0xffff0000);
2640 tcg_gen_shri_i32(tmp, var, 16);
2641 tcg_gen_or_i32(var, var, tmp);
2642 tcg_temp_free_i32(tmp);
2643 }
2644
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2646 {
2647 #ifndef CONFIG_USER_ONLY
2648 return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649 ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651 return true;
2652 #endif
2653 }
2654
2655 static void gen_goto_ptr(void)
2656 {
2657 tcg_gen_lookup_and_goto_ptr();
2658 }
2659
2660 /* This will end the TB but doesn't guarantee we'll return to
2661 * cpu_loop_exec. Any live exit_requests will be processed as we
2662 * enter the next TB.
2663 */
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2665 {
2666 if (use_goto_tb(s, dest)) {
2667 tcg_gen_goto_tb(n);
2668 gen_set_pc_im(s, dest);
2669 tcg_gen_exit_tb(s->base.tb, n);
2670 } else {
2671 gen_set_pc_im(s, dest);
2672 gen_goto_ptr();
2673 }
2674 s->base.is_jmp = DISAS_NORETURN;
2675 }
2676
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2678 {
2679 if (unlikely(is_singlestepping(s))) {
2680 /* An indirect jump so that we still trigger the debug exception. */
2681 gen_set_pc_im(s, dest);
2682 s->base.is_jmp = DISAS_JUMP;
2683 } else {
2684 gen_goto_tb(s, 0, dest);
2685 }
2686 }
2687
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2689 {
2690 if (x)
2691 tcg_gen_sari_i32(t0, t0, 16);
2692 else
2693 gen_sxth(t0);
2694 if (y)
2695 tcg_gen_sari_i32(t1, t1, 16);
2696 else
2697 gen_sxth(t1);
2698 tcg_gen_mul_i32(t0, t0, t1);
2699 }
2700
2701 /* Return the mask of PSR bits set by a MSR instruction. */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2703 {
2704 uint32_t mask = 0;
2705
2706 if (flags & (1 << 0)) {
2707 mask |= 0xff;
2708 }
2709 if (flags & (1 << 1)) {
2710 mask |= 0xff00;
2711 }
2712 if (flags & (1 << 2)) {
2713 mask |= 0xff0000;
2714 }
2715 if (flags & (1 << 3)) {
2716 mask |= 0xff000000;
2717 }
2718
2719 /* Mask out undefined and reserved bits. */
2720 mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2721
2722 /* Mask out execution state. */
2723 if (!spsr) {
2724 mask &= ~CPSR_EXEC;
2725 }
2726
2727 /* Mask out privileged bits. */
2728 if (IS_USER(s)) {
2729 mask &= CPSR_USER;
2730 }
2731 return mask;
2732 }
2733
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2736 {
2737 TCGv_i32 tmp;
2738 if (spsr) {
2739 /* ??? This is also undefined in system mode. */
2740 if (IS_USER(s))
2741 return 1;
2742
2743 tmp = load_cpu_field(spsr);
2744 tcg_gen_andi_i32(tmp, tmp, ~mask);
2745 tcg_gen_andi_i32(t0, t0, mask);
2746 tcg_gen_or_i32(tmp, tmp, t0);
2747 store_cpu_field(tmp, spsr);
2748 } else {
2749 gen_set_cpsr(t0, mask);
2750 }
2751 tcg_temp_free_i32(t0);
2752 gen_lookup_tb(s);
2753 return 0;
2754 }
2755
2756 /* Returns nonzero if access to the PSR is not permitted. */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2758 {
2759 TCGv_i32 tmp;
2760 tmp = tcg_temp_new_i32();
2761 tcg_gen_movi_i32(tmp, val);
2762 return gen_set_psr(s, mask, spsr, tmp);
2763 }
2764
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766 int *tgtmode, int *regno)
2767 {
2768 /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769 * the target mode and register number, and identify the various
2770 * unpredictable cases.
2771 * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772 * + executed in user mode
2773 * + using R15 as the src/dest register
2774 * + accessing an unimplemented register
2775 * + accessing a register that's inaccessible at current PL/security state*
2776 * + accessing a register that you could access with a different insn
2777 * We choose to UNDEF in all these cases.
2778 * Since we don't know which of the various AArch32 modes we are in
2779 * we have to defer some checks to runtime.
2780 * Accesses to Monitor mode registers from Secure EL1 (which implies
2781 * that EL3 is AArch64) must trap to EL3.
2782 *
2783 * If the access checks fail this function will emit code to take
2784 * an exception and return false. Otherwise it will return true,
2785 * and set *tgtmode and *regno appropriately.
2786 */
2787 int exc_target = default_exception_el(s);
2788
2789 /* These instructions are present only in ARMv8, or in ARMv7 with the
2790 * Virtualization Extensions.
2791 */
2792 if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793 !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794 goto undef;
2795 }
2796
2797 if (IS_USER(s) || rn == 15) {
2798 goto undef;
2799 }
2800
2801 /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802 * of registers into (r, sysm).
2803 */
2804 if (r) {
2805 /* SPSRs for other modes */
2806 switch (sysm) {
2807 case 0xe: /* SPSR_fiq */
2808 *tgtmode = ARM_CPU_MODE_FIQ;
2809 break;
2810 case 0x10: /* SPSR_irq */
2811 *tgtmode = ARM_CPU_MODE_IRQ;
2812 break;
2813 case 0x12: /* SPSR_svc */
2814 *tgtmode = ARM_CPU_MODE_SVC;
2815 break;
2816 case 0x14: /* SPSR_abt */
2817 *tgtmode = ARM_CPU_MODE_ABT;
2818 break;
2819 case 0x16: /* SPSR_und */
2820 *tgtmode = ARM_CPU_MODE_UND;
2821 break;
2822 case 0x1c: /* SPSR_mon */
2823 *tgtmode = ARM_CPU_MODE_MON;
2824 break;
2825 case 0x1e: /* SPSR_hyp */
2826 *tgtmode = ARM_CPU_MODE_HYP;
2827 break;
2828 default: /* unallocated */
2829 goto undef;
2830 }
2831 /* We arbitrarily assign SPSR a register number of 16. */
2832 *regno = 16;
2833 } else {
2834 /* general purpose registers for other modes */
2835 switch (sysm) {
2836 case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
2837 *tgtmode = ARM_CPU_MODE_USR;
2838 *regno = sysm + 8;
2839 break;
2840 case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
2841 *tgtmode = ARM_CPU_MODE_FIQ;
2842 *regno = sysm;
2843 break;
2844 case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845 *tgtmode = ARM_CPU_MODE_IRQ;
2846 *regno = sysm & 1 ? 13 : 14;
2847 break;
2848 case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849 *tgtmode = ARM_CPU_MODE_SVC;
2850 *regno = sysm & 1 ? 13 : 14;
2851 break;
2852 case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853 *tgtmode = ARM_CPU_MODE_ABT;
2854 *regno = sysm & 1 ? 13 : 14;
2855 break;
2856 case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857 *tgtmode = ARM_CPU_MODE_UND;
2858 *regno = sysm & 1 ? 13 : 14;
2859 break;
2860 case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861 *tgtmode = ARM_CPU_MODE_MON;
2862 *regno = sysm & 1 ? 13 : 14;
2863 break;
2864 case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865 *tgtmode = ARM_CPU_MODE_HYP;
2866 /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867 *regno = sysm & 1 ? 13 : 17;
2868 break;
2869 default: /* unallocated */
2870 goto undef;
2871 }
2872 }
2873
2874 /* Catch the 'accessing inaccessible register' cases we can detect
2875 * at translate time.
2876 */
2877 switch (*tgtmode) {
2878 case ARM_CPU_MODE_MON:
2879 if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880 goto undef;
2881 }
2882 if (s->current_el == 1) {
2883 /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884 * then accesses to Mon registers trap to EL3
2885 */
2886 exc_target = 3;
2887 goto undef;
2888 }
2889 break;
2890 case ARM_CPU_MODE_HYP:
2891 /*
2892 * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893 * (and so we can forbid accesses from EL2 or below). elr_hyp
2894 * can be accessed also from Hyp mode, so forbid accesses from
2895 * EL0 or EL1.
2896 */
2897 if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898 (s->current_el < 3 && *regno != 17)) {
2899 goto undef;
2900 }
2901 break;
2902 default:
2903 break;
2904 }
2905
2906 return true;
2907
2908 undef:
2909 /* If we get here then some access check did not pass */
2910 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911 syn_uncategorized(), exc_target);
2912 return false;
2913 }
2914
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2916 {
2917 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918 int tgtmode = 0, regno = 0;
2919
2920 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921 return;
2922 }
2923
2924 /* Sync state because msr_banked() can raise exceptions */
2925 gen_set_condexec(s);
2926 gen_set_pc_im(s, s->pc_curr);
2927 tcg_reg = load_reg(s, rn);
2928 tcg_tgtmode = tcg_const_i32(tgtmode);
2929 tcg_regno = tcg_const_i32(regno);
2930 gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931 tcg_temp_free_i32(tcg_tgtmode);
2932 tcg_temp_free_i32(tcg_regno);
2933 tcg_temp_free_i32(tcg_reg);
2934 s->base.is_jmp = DISAS_UPDATE;
2935 }
2936
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2938 {
2939 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940 int tgtmode = 0, regno = 0;
2941
2942 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943 return;
2944 }
2945
2946 /* Sync state because mrs_banked() can raise exceptions */
2947 gen_set_condexec(s);
2948 gen_set_pc_im(s, s->pc_curr);
2949 tcg_reg = tcg_temp_new_i32();
2950 tcg_tgtmode = tcg_const_i32(tgtmode);
2951 tcg_regno = tcg_const_i32(regno);
2952 gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953 tcg_temp_free_i32(tcg_tgtmode);
2954 tcg_temp_free_i32(tcg_regno);
2955 store_reg(s, rn, tcg_reg);
2956 s->base.is_jmp = DISAS_UPDATE;
2957 }
2958
2959 /* Store value to PC as for an exception return (ie don't
2960 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961 * will do the masking based on the new value of the Thumb bit.
2962 */
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2964 {
2965 tcg_gen_mov_i32(cpu_R[15], pc);
2966 tcg_temp_free_i32(pc);
2967 }
2968
2969 /* Generate a v6 exception return. Marks both values as dead. */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2971 {
2972 store_pc_exc_ret(s, pc);
2973 /* The cpsr_write_eret helper will mask the low bits of PC
2974 * appropriately depending on the new Thumb bit, so it must
2975 * be called after storing the new PC.
2976 */
2977 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978 gen_io_start();
2979 }
2980 gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981 tcg_temp_free_i32(cpsr);
2982 /* Must exit loop to check un-masked IRQs */
2983 s->base.is_jmp = DISAS_EXIT;
2984 }
2985
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2988 {
2989 gen_rfe(s, pc, load_cpu_field(spsr));
2990 }
2991
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2993
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2995 {
2996 switch (size) {
2997 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999 case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000 default: abort();
3001 }
3002 }
3003
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3005 {
3006 switch (size) {
3007 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010 default: return;
3011 }
3012 }
3013
3014 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3015 switch ((size << 1) | u) { \
3016 case 0: \
3017 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3018 break; \
3019 case 1: \
3020 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3021 break; \
3022 case 2: \
3023 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3024 break; \
3025 case 3: \
3026 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3027 break; \
3028 case 4: \
3029 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3030 break; \
3031 case 5: \
3032 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3033 break; \
3034 default: return 1; \
3035 }} while (0)
3036
3037 #define GEN_NEON_INTEGER_OP(name) do { \
3038 switch ((size << 1) | u) { \
3039 case 0: \
3040 gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3041 break; \
3042 case 1: \
3043 gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3044 break; \
3045 case 2: \
3046 gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3047 break; \
3048 case 3: \
3049 gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3050 break; \
3051 case 4: \
3052 gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3053 break; \
3054 case 5: \
3055 gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3056 break; \
3057 default: return 1; \
3058 }} while (0)
3059
3060 static TCGv_i32 neon_load_scratch(int scratch)
3061 {
3062 TCGv_i32 tmp = tcg_temp_new_i32();
3063 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3064 return tmp;
3065 }
3066
3067 static void neon_store_scratch(int scratch, TCGv_i32 var)
3068 {
3069 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3070 tcg_temp_free_i32(var);
3071 }
3072
3073 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3074 {
3075 TCGv_i32 tmp;
3076 if (size == 1) {
3077 tmp = neon_load_reg(reg & 7, reg >> 4);
3078 if (reg & 8) {
3079 gen_neon_dup_high16(tmp);
3080 } else {
3081 gen_neon_dup_low16(tmp);
3082 }
3083 } else {
3084 tmp = neon_load_reg(reg & 15, reg >> 4);
3085 }
3086 return tmp;
3087 }
3088
3089 static int gen_neon_unzip(int rd, int rm, int size, int q)
3090 {
3091 TCGv_ptr pd, pm;
3092
3093 if (!q && size == 2) {
3094 return 1;
3095 }
3096 pd = vfp_reg_ptr(true, rd);
3097 pm = vfp_reg_ptr(true, rm);
3098 if (q) {
3099 switch (size) {
3100 case 0:
3101 gen_helper_neon_qunzip8(pd, pm);
3102 break;
3103 case 1:
3104 gen_helper_neon_qunzip16(pd, pm);
3105 break;
3106 case 2:
3107 gen_helper_neon_qunzip32(pd, pm);
3108 break;
3109 default:
3110 abort();
3111 }
3112 } else {
3113 switch (size) {
3114 case 0:
3115 gen_helper_neon_unzip8(pd, pm);
3116 break;
3117 case 1:
3118 gen_helper_neon_unzip16(pd, pm);
3119 break;
3120 default:
3121 abort();
3122 }
3123 }
3124 tcg_temp_free_ptr(pd);
3125 tcg_temp_free_ptr(pm);
3126 return 0;
3127 }
3128
3129 static int gen_neon_zip(int rd, int rm, int size, int q)
3130 {
3131 TCGv_ptr pd, pm;
3132
3133 if (!q && size == 2) {
3134 return 1;
3135 }
3136 pd = vfp_reg_ptr(true, rd);
3137 pm = vfp_reg_ptr(true, rm);
3138 if (q) {
3139 switch (size) {
3140 case 0:
3141 gen_helper_neon_qzip8(pd, pm);
3142 break;
3143 case 1:
3144 gen_helper_neon_qzip16(pd, pm);
3145 break;
3146 case 2:
3147 gen_helper_neon_qzip32(pd, pm);
3148 break;
3149 default:
3150 abort();
3151 }
3152 } else {
3153 switch (size) {
3154 case 0:
3155 gen_helper_neon_zip8(pd, pm);
3156 break;
3157 case 1:
3158 gen_helper_neon_zip16(pd, pm);
3159 break;
3160 default:
3161 abort();
3162 }
3163 }
3164 tcg_temp_free_ptr(pd);
3165 tcg_temp_free_ptr(pm);
3166 return 0;
3167 }
3168
3169 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3170 {
3171 TCGv_i32 rd, tmp;
3172
3173 rd = tcg_temp_new_i32();
3174 tmp = tcg_temp_new_i32();
3175
3176 tcg_gen_shli_i32(rd, t0, 8);
3177 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3178 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3179 tcg_gen_or_i32(rd, rd, tmp);
3180
3181 tcg_gen_shri_i32(t1, t1, 8);
3182 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3183 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3184 tcg_gen_or_i32(t1, t1, tmp);
3185 tcg_gen_mov_i32(t0, rd);
3186
3187 tcg_temp_free_i32(tmp);
3188 tcg_temp_free_i32(rd);
3189 }
3190
3191 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3192 {
3193 TCGv_i32 rd, tmp;
3194
3195 rd = tcg_temp_new_i32();
3196 tmp = tcg_temp_new_i32();
3197
3198 tcg_gen_shli_i32(rd, t0, 16);
3199 tcg_gen_andi_i32(tmp, t1, 0xffff);
3200 tcg_gen_or_i32(rd, rd, tmp);
3201 tcg_gen_shri_i32(t1, t1, 16);
3202 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3203 tcg_gen_or_i32(t1, t1, tmp);
3204 tcg_gen_mov_i32(t0, rd);
3205
3206 tcg_temp_free_i32(tmp);
3207 tcg_temp_free_i32(rd);
3208 }
3209
3210 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3211 {
3212 switch (size) {
3213 case 0: gen_helper_neon_narrow_u8(dest, src); break;
3214 case 1: gen_helper_neon_narrow_u16(dest, src); break;
3215 case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3216 default: abort();
3217 }
3218 }
3219
3220 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3221 {
3222 switch (size) {
3223 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3224 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3225 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3226 default: abort();
3227 }
3228 }
3229
3230 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3231 {
3232 switch (size) {
3233 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3234 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3235 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3236 default: abort();
3237 }
3238 }
3239
3240 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3241 {
3242 switch (size) {
3243 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3244 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3245 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3246 default: abort();
3247 }
3248 }
3249
3250 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3251 int q, int u)
3252 {
3253 if (q) {
3254 if (u) {
3255 switch (size) {
3256 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3257 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3258 default: abort();
3259 }
3260 } else {
3261 switch (size) {
3262 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3263 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3264 default: abort();
3265 }
3266 }
3267 } else {
3268 if (u) {
3269 switch (size) {
3270 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3271 case 2: gen_ushl_i32(var, var, shift); break;
3272 default: abort();
3273 }
3274 } else {
3275 switch (size) {
3276 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3277 case 2: gen_sshl_i32(var, var, shift); break;
3278 default: abort();
3279 }
3280 }
3281 }
3282 }
3283
3284 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3285 {
3286 if (u) {
3287 switch (size) {
3288 case 0: gen_helper_neon_widen_u8(dest, src); break;
3289 case 1: gen_helper_neon_widen_u16(dest, src); break;
3290 case 2: tcg_gen_extu_i32_i64(dest, src); break;
3291 default: abort();
3292 }
3293 } else {
3294 switch (size) {
3295 case 0: gen_helper_neon_widen_s8(dest, src); break;
3296 case 1: gen_helper_neon_widen_s16(dest, src); break;
3297 case 2: tcg_gen_ext_i32_i64(dest, src); break;
3298 default: abort();
3299 }
3300 }
3301 tcg_temp_free_i32(src);
3302 }
3303
3304 static inline void gen_neon_addl(int size)
3305 {
3306 switch (size) {
3307 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3308 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3309 case 2: tcg_gen_add_i64(CPU_V001); break;
3310 default: abort();
3311 }
3312 }
3313
3314 static inline void gen_neon_subl(int size)
3315 {
3316 switch (size) {
3317 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3318 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3319 case 2: tcg_gen_sub_i64(CPU_V001); break;
3320 default: abort();
3321 }
3322 }
3323
3324 static inline void gen_neon_negl(TCGv_i64 var, int size)
3325 {
3326 switch (size) {
3327 case 0: gen_helper_neon_negl_u16(var, var); break;
3328 case 1: gen_helper_neon_negl_u32(var, var); break;
3329 case 2:
3330 tcg_gen_neg_i64(var, var);
3331 break;
3332 default: abort();
3333 }
3334 }
3335
3336 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3337 {
3338 switch (size) {
3339 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3340 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3341 default: abort();
3342 }
3343 }
3344
3345 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3346 int size, int u)
3347 {
3348 TCGv_i64 tmp;
3349
3350 switch ((size << 1) | u) {
3351 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3352 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3353 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3354 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3355 case 4:
3356 tmp = gen_muls_i64_i32(a, b);
3357 tcg_gen_mov_i64(dest, tmp);
3358 tcg_temp_free_i64(tmp);
3359 break;
3360 case 5:
3361 tmp = gen_mulu_i64_i32(a, b);
3362 tcg_gen_mov_i64(dest, tmp);
3363 tcg_temp_free_i64(tmp);
3364 break;
3365 default: abort();
3366 }
3367
3368 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3369 Don't forget to clean them now. */
3370 if (size < 2) {
3371 tcg_temp_free_i32(a);
3372 tcg_temp_free_i32(b);
3373 }
3374 }
3375
3376 static void gen_neon_narrow_op(int op, int u, int size,
3377 TCGv_i32 dest, TCGv_i64 src)
3378 {
3379 if (op) {
3380 if (u) {
3381 gen_neon_unarrow_sats(size, dest, src);
3382 } else {
3383 gen_neon_narrow(size, dest, src);
3384 }
3385 } else {
3386 if (u) {
3387 gen_neon_narrow_satu(size, dest, src);
3388 } else {
3389 gen_neon_narrow_sats(size, dest, src);
3390 }
3391 }
3392 }
3393
3394 /* Symbolic constants for op fields for Neon 3-register same-length.
3395 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3396 * table A7-9.
3397 */
3398 #define NEON_3R_VHADD 0
3399 #define NEON_3R_VQADD 1
3400 #define NEON_3R_VRHADD 2
3401 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3402 #define NEON_3R_VHSUB 4
3403 #define NEON_3R_VQSUB 5
3404 #define NEON_3R_VCGT 6
3405 #define NEON_3R_VCGE 7
3406 #define NEON_3R_VSHL 8
3407 #define NEON_3R_VQSHL 9
3408 #define NEON_3R_VRSHL 10
3409 #define NEON_3R_VQRSHL 11
3410 #define NEON_3R_VMAX 12
3411 #define NEON_3R_VMIN 13
3412 #define NEON_3R_VABD 14
3413 #define NEON_3R_VABA 15
3414 #define NEON_3R_VADD_VSUB 16
3415 #define NEON_3R_VTST_VCEQ 17
3416 #define NEON_3R_VML 18 /* VMLA, VMLS */
3417 #define NEON_3R_VMUL 19
3418 #define NEON_3R_VPMAX 20
3419 #define NEON_3R_VPMIN 21
3420 #define NEON_3R_VQDMULH_VQRDMULH 22
3421 #define NEON_3R_VPADD_VQRDMLAH 23
3422 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3423 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3424 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3425 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3426 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3427 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3428 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3429 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3430
3431 static const uint8_t neon_3r_sizes[] = {
3432 [NEON_3R_VHADD] = 0x7,
3433 [NEON_3R_VQADD] = 0xf,
3434 [NEON_3R_VRHADD] = 0x7,
3435 [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
3436 [NEON_3R_VHSUB] = 0x7,
3437 [NEON_3R_VQSUB] = 0xf,
3438 [NEON_3R_VCGT] = 0x7,
3439 [NEON_3R_VCGE] = 0x7,
3440 [NEON_3R_VSHL] = 0xf,
3441 [NEON_3R_VQSHL] = 0xf,
3442 [NEON_3R_VRSHL] = 0xf,
3443 [NEON_3R_VQRSHL] = 0xf,
3444 [NEON_3R_VMAX] = 0x7,
3445 [NEON_3R_VMIN] = 0x7,
3446 [NEON_3R_VABD] = 0x7,
3447 [NEON_3R_VABA] = 0x7,
3448 [NEON_3R_VADD_VSUB] = 0xf,
3449 [NEON_3R_VTST_VCEQ] = 0x7,
3450 [NEON_3R_VML] = 0x7,
3451 [NEON_3R_VMUL] = 0x7,
3452 [NEON_3R_VPMAX] = 0x7,
3453 [NEON_3R_VPMIN] = 0x7,
3454 [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
3455 [NEON_3R_VPADD_VQRDMLAH] = 0x7,
3456 [NEON_3R_SHA] = 0xf, /* size field encodes op type */
3457 [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
3458 [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
3459 [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
3460 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
3461 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
3462 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
3463 [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
3464 };
3465
3466 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3467 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3468 * table A7-13.
3469 */
3470 #define NEON_2RM_VREV64 0
3471 #define NEON_2RM_VREV32 1
3472 #define NEON_2RM_VREV16 2
3473 #define NEON_2RM_VPADDL 4
3474 #define NEON_2RM_VPADDL_U 5
3475 #define NEON_2RM_AESE 6 /* Includes AESD */
3476 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3477 #define NEON_2RM_VCLS 8
3478 #define NEON_2RM_VCLZ 9
3479 #define NEON_2RM_VCNT 10
3480 #define NEON_2RM_VMVN 11
3481 #define NEON_2RM_VPADAL 12
3482 #define NEON_2RM_VPADAL_U 13
3483 #define NEON_2RM_VQABS 14
3484 #define NEON_2RM_VQNEG 15
3485 #define NEON_2RM_VCGT0 16
3486 #define NEON_2RM_VCGE0 17
3487 #define NEON_2RM_VCEQ0 18
3488 #define NEON_2RM_VCLE0 19
3489 #define NEON_2RM_VCLT0 20
3490 #define NEON_2RM_SHA1H 21
3491 #define NEON_2RM_VABS 22
3492 #define NEON_2RM_VNEG 23
3493 #define NEON_2RM_VCGT0_F 24
3494 #define NEON_2RM_VCGE0_F 25
3495 #define NEON_2RM_VCEQ0_F 26
3496 #define NEON_2RM_VCLE0_F 27
3497 #define NEON_2RM_VCLT0_F 28
3498 #define NEON_2RM_VABS_F 30
3499 #define NEON_2RM_VNEG_F 31
3500 #define NEON_2RM_VSWP 32
3501 #define NEON_2RM_VTRN 33
3502 #define NEON_2RM_VUZP 34
3503 #define NEON_2RM_VZIP 35
3504 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3505 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3506 #define NEON_2RM_VSHLL 38
3507 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3508 #define NEON_2RM_VRINTN 40
3509 #define NEON_2RM_VRINTX 41
3510 #define NEON_2RM_VRINTA 42
3511 #define NEON_2RM_VRINTZ 43
3512 #define NEON_2RM_VCVT_F16_F32 44
3513 #define NEON_2RM_VRINTM 45
3514 #define NEON_2RM_VCVT_F32_F16 46
3515 #define NEON_2RM_VRINTP 47
3516 #define NEON_2RM_VCVTAU 48
3517 #define NEON_2RM_VCVTAS 49
3518 #define NEON_2RM_VCVTNU 50
3519 #define NEON_2RM_VCVTNS 51
3520 #define NEON_2RM_VCVTPU 52
3521 #define NEON_2RM_VCVTPS 53
3522 #define NEON_2RM_VCVTMU 54
3523 #define NEON_2RM_VCVTMS 55
3524 #define NEON_2RM_VRECPE 56
3525 #define NEON_2RM_VRSQRTE 57
3526 #define NEON_2RM_VRECPE_F 58
3527 #define NEON_2RM_VRSQRTE_F 59
3528 #define NEON_2RM_VCVT_FS 60
3529 #define NEON_2RM_VCVT_FU 61
3530 #define NEON_2RM_VCVT_SF 62
3531 #define NEON_2RM_VCVT_UF 63
3532
3533 static bool neon_2rm_is_v8_op(int op)
3534 {
3535 /* Return true if this neon 2reg-misc op is ARMv8 and up */
3536 switch (op) {
3537 case NEON_2RM_VRINTN:
3538 case NEON_2RM_VRINTA:
3539 case NEON_2RM_VRINTM:
3540 case NEON_2RM_VRINTP:
3541 case NEON_2RM_VRINTZ:
3542 case NEON_2RM_VRINTX:
3543 case NEON_2RM_VCVTAU:
3544 case NEON_2RM_VCVTAS:
3545 case NEON_2RM_VCVTNU:
3546 case NEON_2RM_VCVTNS:
3547 case NEON_2RM_VCVTPU:
3548 case NEON_2RM_VCVTPS:
3549 case NEON_2RM_VCVTMU:
3550 case NEON_2RM_VCVTMS:
3551 return true;
3552 default:
3553 return false;
3554 }
3555 }
3556
3557 /* Each entry in this array has bit n set if the insn allows
3558 * size value n (otherwise it will UNDEF). Since unallocated
3559 * op values will have no bits set they always UNDEF.
3560 */
3561 static const uint8_t neon_2rm_sizes[] = {
3562 [NEON_2RM_VREV64] = 0x7,
3563 [NEON_2RM_VREV32] = 0x3,
3564 [NEON_2RM_VREV16] = 0x1,
3565 [NEON_2RM_VPADDL] = 0x7,
3566 [NEON_2RM_VPADDL_U] = 0x7,
3567 [NEON_2RM_AESE] = 0x1,
3568 [NEON_2RM_AESMC] = 0x1,
3569 [NEON_2RM_VCLS] = 0x7,
3570 [NEON_2RM_VCLZ] = 0x7,
3571 [NEON_2RM_VCNT] = 0x1,
3572 [NEON_2RM_VMVN] = 0x1,
3573 [NEON_2RM_VPADAL] = 0x7,
3574 [NEON_2RM_VPADAL_U] = 0x7,
3575 [NEON_2RM_VQABS] = 0x7,
3576 [NEON_2RM_VQNEG] = 0x7,
3577 [NEON_2RM_VCGT0] = 0x7,
3578 [NEON_2RM_VCGE0] = 0x7,
3579 [NEON_2RM_VCEQ0] = 0x7,
3580 [NEON_2RM_VCLE0] = 0x7,
3581 [NEON_2RM_VCLT0] = 0x7,
3582 [NEON_2RM_SHA1H] = 0x4,
3583 [NEON_2RM_VABS] = 0x7,
3584 [NEON_2RM_VNEG] = 0x7,
3585 [NEON_2RM_VCGT0_F] = 0x4,
3586 [NEON_2RM_VCGE0_F] = 0x4,
3587 [NEON_2RM_VCEQ0_F] = 0x4,
3588 [NEON_2RM_VCLE0_F] = 0x4,
3589 [NEON_2RM_VCLT0_F] = 0x4,
3590 [NEON_2RM_VABS_F] = 0x4,
3591 [NEON_2RM_VNEG_F] = 0x4,
3592 [NEON_2RM_VSWP] = 0x1,
3593 [NEON_2RM_VTRN] = 0x7,
3594 [NEON_2RM_VUZP] = 0x7,
3595 [NEON_2RM_VZIP] = 0x7,
3596 [NEON_2RM_VMOVN] = 0x7,
3597 [NEON_2RM_VQMOVN] = 0x7,
3598 [NEON_2RM_VSHLL] = 0x7,
3599 [NEON_2RM_SHA1SU1] = 0x4,
3600 [NEON_2RM_VRINTN] = 0x4,
3601 [NEON_2RM_VRINTX] = 0x4,
3602 [NEON_2RM_VRINTA] = 0x4,
3603 [NEON_2RM_VRINTZ] = 0x4,
3604 [NEON_2RM_VCVT_F16_F32] = 0x2,
3605 [NEON_2RM_VRINTM] = 0x4,
3606 [NEON_2RM_VCVT_F32_F16] = 0x2,
3607 [NEON_2RM_VRINTP] = 0x4,
3608 [NEON_2RM_VCVTAU] = 0x4,
3609 [NEON_2RM_VCVTAS] = 0x4,
3610 [NEON_2RM_VCVTNU] = 0x4,
3611 [NEON_2RM_VCVTNS] = 0x4,
3612 [NEON_2RM_VCVTPU] = 0x4,
3613 [NEON_2RM_VCVTPS] = 0x4,
3614 [NEON_2RM_VCVTMU] = 0x4,
3615 [NEON_2RM_VCVTMS] = 0x4,
3616 [NEON_2RM_VRECPE] = 0x4,
3617 [NEON_2RM_VRSQRTE] = 0x4,
3618 [NEON_2RM_VRECPE_F] = 0x4,
3619 [NEON_2RM_VRSQRTE_F] = 0x4,
3620 [NEON_2RM_VCVT_FS] = 0x4,
3621 [NEON_2RM_VCVT_FU] = 0x4,
3622 [NEON_2RM_VCVT_SF] = 0x4,
3623 [NEON_2RM_VCVT_UF] = 0x4,
3624 };
3625
3626 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3627 uint32_t opr_sz, uint32_t max_sz,
3628 gen_helper_gvec_3_ptr *fn)
3629 {
3630 TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3631
3632 tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3633 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3634 opr_sz, max_sz, 0, fn);
3635 tcg_temp_free_ptr(qc_ptr);
3636 }
3637
3638 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3639 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3640 {
3641 static gen_helper_gvec_3_ptr * const fns[2] = {
3642 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3643 };
3644 tcg_debug_assert(vece >= 1 && vece <= 2);
3645 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3646 }
3647
3648 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3649 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3650 {
3651 static gen_helper_gvec_3_ptr * const fns[2] = {
3652 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3653 };
3654 tcg_debug_assert(vece >= 1 && vece <= 2);
3655 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3656 }
3657
3658 #define GEN_CMP0(NAME, COND) \
3659 static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
3660 { \
3661 tcg_gen_setcondi_i32(COND, d, a, 0); \
3662 tcg_gen_neg_i32(d, d); \
3663 } \
3664 static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
3665 { \
3666 tcg_gen_setcondi_i64(COND, d, a, 0); \
3667 tcg_gen_neg_i64(d, d); \
3668 } \
3669 static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3670 { \
3671 TCGv_vec zero = tcg_const_zeros_vec_matching(d); \
3672 tcg_gen_cmp_vec(COND, vece, d, a, zero); \
3673 tcg_temp_free_vec(zero); \
3674 } \
3675 void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
3676 uint32_t opr_sz, uint32_t max_sz) \
3677 { \
3678 const GVecGen2 op[4] = { \
3679 { .fno = gen_helper_gvec_##NAME##0_b, \
3680 .fniv = gen_##NAME##0_vec, \
3681 .opt_opc = vecop_list_cmp, \
3682 .vece = MO_8 }, \
3683 { .fno = gen_helper_gvec_##NAME##0_h, \
3684 .fniv = gen_##NAME##0_vec, \
3685 .opt_opc = vecop_list_cmp, \
3686 .vece = MO_16 }, \
3687 { .fni4 = gen_##NAME##0_i32, \
3688 .fniv = gen_##NAME##0_vec, \
3689 .opt_opc = vecop_list_cmp, \
3690 .vece = MO_32 }, \
3691 { .fni8 = gen_##NAME##0_i64, \
3692 .fniv = gen_##NAME##0_vec, \
3693 .opt_opc = vecop_list_cmp, \
3694 .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
3695 .vece = MO_64 }, \
3696 }; \
3697 tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
3698 }
3699
3700 static const TCGOpcode vecop_list_cmp[] = {
3701 INDEX_op_cmp_vec, 0
3702 };
3703
3704 GEN_CMP0(ceq, TCG_COND_EQ)
3705 GEN_CMP0(cle, TCG_COND_LE)
3706 GEN_CMP0(cge, TCG_COND_GE)
3707 GEN_CMP0(clt, TCG_COND_LT)
3708 GEN_CMP0(cgt, TCG_COND_GT)
3709
3710 #undef GEN_CMP0
3711
3712 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3713 {
3714 tcg_gen_vec_sar8i_i64(a, a, shift);
3715 tcg_gen_vec_add8_i64(d, d, a);
3716 }
3717
3718 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3719 {
3720 tcg_gen_vec_sar16i_i64(a, a, shift);
3721 tcg_gen_vec_add16_i64(d, d, a);
3722 }
3723
3724 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3725 {
3726 tcg_gen_sari_i32(a, a, shift);
3727 tcg_gen_add_i32(d, d, a);
3728 }
3729
3730 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3731 {
3732 tcg_gen_sari_i64(a, a, shift);
3733 tcg_gen_add_i64(d, d, a);
3734 }
3735
3736 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3737 {
3738 tcg_gen_sari_vec(vece, a, a, sh);
3739 tcg_gen_add_vec(vece, d, d, a);
3740 }
3741
3742 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3743 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3744 {
3745 static const TCGOpcode vecop_list[] = {
3746 INDEX_op_sari_vec, INDEX_op_add_vec, 0
3747 };
3748 static const GVecGen2i ops[4] = {
3749 { .fni8 = gen_ssra8_i64,
3750 .fniv = gen_ssra_vec,
3751 .fno = gen_helper_gvec_ssra_b,
3752 .load_dest = true,
3753 .opt_opc = vecop_list,
3754 .vece = MO_8 },
3755 { .fni8 = gen_ssra16_i64,
3756 .fniv = gen_ssra_vec,
3757 .fno = gen_helper_gvec_ssra_h,
3758 .load_dest = true,
3759 .opt_opc = vecop_list,
3760 .vece = MO_16 },
3761 { .fni4 = gen_ssra32_i32,
3762 .fniv = gen_ssra_vec,
3763 .fno = gen_helper_gvec_ssra_s,
3764 .load_dest = true,
3765 .opt_opc = vecop_list,
3766 .vece = MO_32 },
3767 { .fni8 = gen_ssra64_i64,
3768 .fniv = gen_ssra_vec,
3769 .fno = gen_helper_gvec_ssra_b,
3770 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3771 .opt_opc = vecop_list,
3772 .load_dest = true,
3773 .vece = MO_64 },
3774 };
3775
3776 /* tszimm encoding produces immediates in the range [1..esize]. */
3777 tcg_debug_assert(shift > 0);
3778 tcg_debug_assert(shift <= (8 << vece));
3779
3780 /*
3781 * Shifts larger than the element size are architecturally valid.
3782 * Signed results in all sign bits.
3783 */
3784 shift = MIN(shift, (8 << vece) - 1);
3785 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3786 }
3787
3788 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3789 {
3790 tcg_gen_vec_shr8i_i64(a, a, shift);
3791 tcg_gen_vec_add8_i64(d, d, a);
3792 }
3793
3794 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3795 {
3796 tcg_gen_vec_shr16i_i64(a, a, shift);
3797 tcg_gen_vec_add16_i64(d, d, a);
3798 }
3799
3800 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3801 {
3802 tcg_gen_shri_i32(a, a, shift);
3803 tcg_gen_add_i32(d, d, a);
3804 }
3805
3806 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3807 {
3808 tcg_gen_shri_i64(a, a, shift);
3809 tcg_gen_add_i64(d, d, a);
3810 }
3811
3812 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3813 {
3814 tcg_gen_shri_vec(vece, a, a, sh);
3815 tcg_gen_add_vec(vece, d, d, a);
3816 }
3817
3818 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3819 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3820 {
3821 static const TCGOpcode vecop_list[] = {
3822 INDEX_op_shri_vec, INDEX_op_add_vec, 0
3823 };
3824 static const GVecGen2i ops[4] = {
3825 { .fni8 = gen_usra8_i64,
3826 .fniv = gen_usra_vec,
3827 .fno = gen_helper_gvec_usra_b,
3828 .load_dest = true,
3829 .opt_opc = vecop_list,
3830 .vece = MO_8, },
3831 { .fni8 = gen_usra16_i64,
3832 .fniv = gen_usra_vec,
3833 .fno = gen_helper_gvec_usra_h,
3834 .load_dest = true,
3835 .opt_opc = vecop_list,
3836 .vece = MO_16, },
3837 { .fni4 = gen_usra32_i32,
3838 .fniv = gen_usra_vec,
3839 .fno = gen_helper_gvec_usra_s,
3840 .load_dest = true,
3841 .opt_opc = vecop_list,
3842 .vece = MO_32, },
3843 { .fni8 = gen_usra64_i64,
3844 .fniv = gen_usra_vec,
3845 .fno = gen_helper_gvec_usra_d,
3846 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3847 .load_dest = true,
3848 .opt_opc = vecop_list,
3849 .vece = MO_64, },
3850 };
3851
3852 /* tszimm encoding produces immediates in the range [1..esize]. */
3853 tcg_debug_assert(shift > 0);
3854 tcg_debug_assert(shift <= (8 << vece));
3855
3856 /*
3857 * Shifts larger than the element size are architecturally valid.
3858 * Unsigned results in all zeros as input to accumulate: nop.
3859 */
3860 if (shift < (8 << vece)) {
3861 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3862 } else {
3863 /* Nop, but we do need to clear the tail. */
3864 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3865 }
3866 }
3867
3868 /*
3869 * Shift one less than the requested amount, and the low bit is
3870 * the rounding bit. For the 8 and 16-bit operations, because we
3871 * mask the low bit, we can perform a normal integer shift instead
3872 * of a vector shift.
3873 */
3874 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3875 {
3876 TCGv_i64 t = tcg_temp_new_i64();
3877
3878 tcg_gen_shri_i64(t, a, sh - 1);
3879 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3880 tcg_gen_vec_sar8i_i64(d, a, sh);
3881 tcg_gen_vec_add8_i64(d, d, t);
3882 tcg_temp_free_i64(t);
3883 }
3884
3885 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3886 {
3887 TCGv_i64 t = tcg_temp_new_i64();
3888
3889 tcg_gen_shri_i64(t, a, sh - 1);
3890 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3891 tcg_gen_vec_sar16i_i64(d, a, sh);
3892 tcg_gen_vec_add16_i64(d, d, t);
3893 tcg_temp_free_i64(t);
3894 }
3895
3896 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3897 {
3898 TCGv_i32 t = tcg_temp_new_i32();
3899
3900 tcg_gen_extract_i32(t, a, sh - 1, 1);
3901 tcg_gen_sari_i32(d, a, sh);
3902 tcg_gen_add_i32(d, d, t);
3903 tcg_temp_free_i32(t);
3904 }
3905
3906 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3907 {
3908 TCGv_i64 t = tcg_temp_new_i64();
3909
3910 tcg_gen_extract_i64(t, a, sh - 1, 1);
3911 tcg_gen_sari_i64(d, a, sh);
3912 tcg_gen_add_i64(d, d, t);
3913 tcg_temp_free_i64(t);
3914 }
3915
3916 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3917 {
3918 TCGv_vec t = tcg_temp_new_vec_matching(d);
3919 TCGv_vec ones = tcg_temp_new_vec_matching(d);
3920
3921 tcg_gen_shri_vec(vece, t, a, sh - 1);
3922 tcg_gen_dupi_vec(vece, ones, 1);
3923 tcg_gen_and_vec(vece, t, t, ones);
3924 tcg_gen_sari_vec(vece, d, a, sh);
3925 tcg_gen_add_vec(vece, d, d, t);
3926
3927 tcg_temp_free_vec(t);
3928 tcg_temp_free_vec(ones);
3929 }
3930
3931 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3932 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3933 {
3934 static const TCGOpcode vecop_list[] = {
3935 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3936 };
3937 static const GVecGen2i ops[4] = {
3938 { .fni8 = gen_srshr8_i64,
3939 .fniv = gen_srshr_vec,
3940 .fno = gen_helper_gvec_srshr_b,
3941 .opt_opc = vecop_list,
3942 .vece = MO_8 },
3943 { .fni8 = gen_srshr16_i64,
3944 .fniv = gen_srshr_vec,
3945 .fno = gen_helper_gvec_srshr_h,
3946 .opt_opc = vecop_list,
3947 .vece = MO_16 },
3948 { .fni4 = gen_srshr32_i32,
3949 .fniv = gen_srshr_vec,
3950 .fno = gen_helper_gvec_srshr_s,
3951 .opt_opc = vecop_list,
3952 .vece = MO_32 },
3953 { .fni8 = gen_srshr64_i64,
3954 .fniv = gen_srshr_vec,
3955 .fno = gen_helper_gvec_srshr_d,
3956 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3957 .opt_opc = vecop_list,
3958 .vece = MO_64 },
3959 };
3960
3961 /* tszimm encoding produces immediates in the range [1..esize] */
3962 tcg_debug_assert(shift > 0);
3963 tcg_debug_assert(shift <= (8 << vece));
3964
3965 if (shift == (8 << vece)) {
3966 /*
3967 * Shifts larger than the element size are architecturally valid.
3968 * Signed results in all sign bits. With rounding, this produces
3969 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3970 * I.e. always zero.
3971 */
3972 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3973 } else {
3974 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3975 }
3976 }
3977
3978 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3979 {
3980 TCGv_i64 t = tcg_temp_new_i64();
3981
3982 gen_srshr8_i64(t, a, sh);
3983 tcg_gen_vec_add8_i64(d, d, t);
3984 tcg_temp_free_i64(t);
3985 }
3986
3987 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3988 {
3989 TCGv_i64 t = tcg_temp_new_i64();
3990
3991 gen_srshr16_i64(t, a, sh);
3992 tcg_gen_vec_add16_i64(d, d, t);
3993 tcg_temp_free_i64(t);
3994 }
3995
3996 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3997 {
3998 TCGv_i32 t = tcg_temp_new_i32();
3999
4000 gen_srshr32_i32(t, a, sh);
4001 tcg_gen_add_i32(d, d, t);
4002 tcg_temp_free_i32(t);
4003 }
4004
4005 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4006 {
4007 TCGv_i64 t = tcg_temp_new_i64();
4008
4009 gen_srshr64_i64(t, a, sh);
4010 tcg_gen_add_i64(d, d, t);
4011 tcg_temp_free_i64(t);
4012 }
4013
4014 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4015 {
4016 TCGv_vec t = tcg_temp_new_vec_matching(d);
4017
4018 gen_srshr_vec(vece, t, a, sh);
4019 tcg_gen_add_vec(vece, d, d, t);
4020 tcg_temp_free_vec(t);
4021 }
4022
4023 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4024 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4025 {
4026 static const TCGOpcode vecop_list[] = {
4027 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
4028 };
4029 static const GVecGen2i ops[4] = {
4030 { .fni8 = gen_srsra8_i64,
4031 .fniv = gen_srsra_vec,
4032 .fno = gen_helper_gvec_srsra_b,
4033 .opt_opc = vecop_list,
4034 .load_dest = true,
4035 .vece = MO_8 },
4036 { .fni8 = gen_srsra16_i64,
4037 .fniv = gen_srsra_vec,
4038 .fno = gen_helper_gvec_srsra_h,
4039 .opt_opc = vecop_list,
4040 .load_dest = true,
4041 .vece = MO_16 },
4042 { .fni4 = gen_srsra32_i32,
4043 .fniv = gen_srsra_vec,
4044 .fno = gen_helper_gvec_srsra_s,
4045 .opt_opc = vecop_list,
4046 .load_dest = true,
4047 .vece = MO_32 },
4048 { .fni8 = gen_srsra64_i64,
4049 .fniv = gen_srsra_vec,
4050 .fno = gen_helper_gvec_srsra_d,
4051 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4052 .opt_opc = vecop_list,
4053 .load_dest = true,
4054 .vece = MO_64 },
4055 };
4056
4057 /* tszimm encoding produces immediates in the range [1..esize] */
4058 tcg_debug_assert(shift > 0);
4059 tcg_debug_assert(shift <= (8 << vece));
4060
4061 /*
4062 * Shifts larger than the element size are architecturally valid.
4063 * Signed results in all sign bits. With rounding, this produces
4064 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
4065 * I.e. always zero. With accumulation, this leaves D unchanged.
4066 */
4067 if (shift == (8 << vece)) {
4068 /* Nop, but we do need to clear the tail. */
4069 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4070 } else {
4071 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4072 }
4073 }
4074
4075 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4076 {
4077 TCGv_i64 t = tcg_temp_new_i64();
4078
4079 tcg_gen_shri_i64(t, a, sh - 1);
4080 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
4081 tcg_gen_vec_shr8i_i64(d, a, sh);
4082 tcg_gen_vec_add8_i64(d, d, t);
4083 tcg_temp_free_i64(t);
4084 }
4085
4086 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4087 {
4088 TCGv_i64 t = tcg_temp_new_i64();
4089
4090 tcg_gen_shri_i64(t, a, sh - 1);
4091 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
4092 tcg_gen_vec_shr16i_i64(d, a, sh);
4093 tcg_gen_vec_add16_i64(d, d, t);
4094 tcg_temp_free_i64(t);
4095 }
4096
4097 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4098 {
4099 TCGv_i32 t = tcg_temp_new_i32();
4100
4101 tcg_gen_extract_i32(t, a, sh - 1, 1);
4102 tcg_gen_shri_i32(d, a, sh);
4103 tcg_gen_add_i32(d, d, t);
4104 tcg_temp_free_i32(t);
4105 }
4106
4107 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4108 {
4109 TCGv_i64 t = tcg_temp_new_i64();
4110
4111 tcg_gen_extract_i64(t, a, sh - 1, 1);
4112 tcg_gen_shri_i64(d, a, sh);
4113 tcg_gen_add_i64(d, d, t);
4114 tcg_temp_free_i64(t);
4115 }
4116
4117 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
4118 {
4119 TCGv_vec t = tcg_temp_new_vec_matching(d);
4120 TCGv_vec ones = tcg_temp_new_vec_matching(d);
4121
4122 tcg_gen_shri_vec(vece, t, a, shift - 1);
4123 tcg_gen_dupi_vec(vece, ones, 1);
4124 tcg_gen_and_vec(vece, t, t, ones);
4125 tcg_gen_shri_vec(vece, d, a, shift);
4126 tcg_gen_add_vec(vece, d, d, t);
4127
4128 tcg_temp_free_vec(t);
4129 tcg_temp_free_vec(ones);
4130 }
4131
4132 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4133 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4134 {
4135 static const TCGOpcode vecop_list[] = {
4136 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4137 };
4138 static const GVecGen2i ops[4] = {
4139 { .fni8 = gen_urshr8_i64,
4140 .fniv = gen_urshr_vec,
4141 .fno = gen_helper_gvec_urshr_b,
4142 .opt_opc = vecop_list,
4143 .vece = MO_8 },
4144 { .fni8 = gen_urshr16_i64,
4145 .fniv = gen_urshr_vec,
4146 .fno = gen_helper_gvec_urshr_h,
4147 .opt_opc = vecop_list,
4148 .vece = MO_16 },
4149 { .fni4 = gen_urshr32_i32,
4150 .fniv = gen_urshr_vec,
4151 .fno = gen_helper_gvec_urshr_s,
4152 .opt_opc = vecop_list,
4153 .vece = MO_32 },
4154 { .fni8 = gen_urshr64_i64,
4155 .fniv = gen_urshr_vec,
4156 .fno = gen_helper_gvec_urshr_d,
4157 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4158 .opt_opc = vecop_list,
4159 .vece = MO_64 },
4160 };
4161
4162 /* tszimm encoding produces immediates in the range [1..esize] */
4163 tcg_debug_assert(shift > 0);
4164 tcg_debug_assert(shift <= (8 << vece));
4165
4166 if (shift == (8 << vece)) {
4167 /*
4168 * Shifts larger than the element size are architecturally valid.
4169 * Unsigned results in zero. With rounding, this produces a
4170 * copy of the most significant bit.
4171 */
4172 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4173 } else {
4174 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4175 }
4176 }
4177
4178 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4179 {
4180 TCGv_i64 t = tcg_temp_new_i64();
4181
4182 if (sh == 8) {
4183 tcg_gen_vec_shr8i_i64(t, a, 7);
4184 } else {
4185 gen_urshr8_i64(t, a, sh);
4186 }
4187 tcg_gen_vec_add8_i64(d, d, t);
4188 tcg_temp_free_i64(t);
4189 }
4190
4191 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4192 {
4193 TCGv_i64 t = tcg_temp_new_i64();
4194
4195 if (sh == 16) {
4196 tcg_gen_vec_shr16i_i64(t, a, 15);
4197 } else {
4198 gen_urshr16_i64(t, a, sh);
4199 }
4200 tcg_gen_vec_add16_i64(d, d, t);
4201 tcg_temp_free_i64(t);
4202 }
4203
4204 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4205 {
4206 TCGv_i32 t = tcg_temp_new_i32();
4207
4208 if (sh == 32) {
4209 tcg_gen_shri_i32(t, a, 31);
4210 } else {
4211 gen_urshr32_i32(t, a, sh);
4212 }
4213 tcg_gen_add_i32(d, d, t);
4214 tcg_temp_free_i32(t);
4215 }
4216
4217 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4218 {
4219 TCGv_i64 t = tcg_temp_new_i64();
4220
4221 if (sh == 64) {
4222 tcg_gen_shri_i64(t, a, 63);
4223 } else {
4224 gen_urshr64_i64(t, a, sh);
4225 }
4226 tcg_gen_add_i64(d, d, t);
4227 tcg_temp_free_i64(t);
4228 }
4229
4230 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4231 {
4232 TCGv_vec t = tcg_temp_new_vec_matching(d);
4233
4234 if (sh == (8 << vece)) {
4235 tcg_gen_shri_vec(vece, t, a, sh - 1);
4236 } else {
4237 gen_urshr_vec(vece, t, a, sh);
4238 }
4239 tcg_gen_add_vec(vece, d, d, t);
4240 tcg_temp_free_vec(t);
4241 }
4242
4243 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4244 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4245 {
4246 static const TCGOpcode vecop_list[] = {
4247 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4248 };
4249 static const GVecGen2i ops[4] = {
4250 { .fni8 = gen_ursra8_i64,
4251 .fniv = gen_ursra_vec,
4252 .fno = gen_helper_gvec_ursra_b,
4253 .opt_opc = vecop_list,
4254 .load_dest = true,
4255 .vece = MO_8 },
4256 { .fni8 = gen_ursra16_i64,
4257 .fniv = gen_ursra_vec,
4258 .fno = gen_helper_gvec_ursra_h,
4259 .opt_opc = vecop_list,
4260 .load_dest = true,
4261 .vece = MO_16 },
4262 { .fni4 = gen_ursra32_i32,
4263 .fniv = gen_ursra_vec,
4264 .fno = gen_helper_gvec_ursra_s,
4265 .opt_opc = vecop_list,
4266 .load_dest = true,
4267 .vece = MO_32 },
4268 { .fni8 = gen_ursra64_i64,
4269 .fniv = gen_ursra_vec,
4270 .fno = gen_helper_gvec_ursra_d,
4271 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4272 .opt_opc = vecop_list,
4273 .load_dest = true,
4274 .vece = MO_64 },
4275 };
4276
4277 /* tszimm encoding produces immediates in the range [1..esize] */
4278 tcg_debug_assert(shift > 0);
4279 tcg_debug_assert(shift <= (8 << vece));
4280
4281 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4282 }
4283
4284 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4285 {
4286 uint64_t mask = dup_const(MO_8, 0xff >> shift);
4287 TCGv_i64 t = tcg_temp_new_i64();
4288
4289 tcg_gen_shri_i64(t, a, shift);
4290 tcg_gen_andi_i64(t, t, mask);
4291 tcg_gen_andi_i64(d, d, ~mask);
4292 tcg_gen_or_i64(d, d, t);
4293 tcg_temp_free_i64(t);
4294 }
4295
4296 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4297 {
4298 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4299 TCGv_i64 t = tcg_temp_new_i64();
4300
4301 tcg_gen_shri_i64(t, a, shift);
4302 tcg_gen_andi_i64(t, t, mask);
4303 tcg_gen_andi_i64(d, d, ~mask);
4304 tcg_gen_or_i64(d, d, t);
4305 tcg_temp_free_i64(t);
4306 }
4307
4308 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4309 {
4310 tcg_gen_shri_i32(a, a, shift);
4311 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4312 }
4313
4314 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4315 {
4316 tcg_gen_shri_i64(a, a, shift);
4317 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4318 }
4319
4320 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4321 {
4322 TCGv_vec t = tcg_temp_new_vec_matching(d);
4323 TCGv_vec m = tcg_temp_new_vec_matching(d);
4324
4325 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4326 tcg_gen_shri_vec(vece, t, a, sh);
4327 tcg_gen_and_vec(vece, d, d, m);
4328 tcg_gen_or_vec(vece, d, d, t);
4329
4330 tcg_temp_free_vec(t);
4331 tcg_temp_free_vec(m);
4332 }
4333
4334 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4335 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4336 {
4337 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4338 const GVecGen2i ops[4] = {
4339 { .fni8 = gen_shr8_ins_i64,
4340 .fniv = gen_shr_ins_vec,
4341 .fno = gen_helper_gvec_sri_b,
4342 .load_dest = true,
4343 .opt_opc = vecop_list,
4344 .vece = MO_8 },
4345 { .fni8 = gen_shr16_ins_i64,
4346 .fniv = gen_shr_ins_vec,
4347 .fno = gen_helper_gvec_sri_h,
4348 .load_dest = true,
4349 .opt_opc = vecop_list,
4350 .vece = MO_16 },
4351 { .fni4 = gen_shr32_ins_i32,
4352 .fniv = gen_shr_ins_vec,
4353 .fno = gen_helper_gvec_sri_s,
4354 .load_dest = true,
4355 .opt_opc = vecop_list,
4356 .vece = MO_32 },
4357 { .fni8 = gen_shr64_ins_i64,
4358 .fniv = gen_shr_ins_vec,
4359 .fno = gen_helper_gvec_sri_d,
4360 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4361 .load_dest = true,
4362 .opt_opc = vecop_list,
4363 .vece = MO_64 },
4364 };
4365
4366 /* tszimm encoding produces immediates in the range [1..esize]. */
4367 tcg_debug_assert(shift > 0);
4368 tcg_debug_assert(shift <= (8 << vece));
4369
4370 /* Shift of esize leaves destination unchanged. */
4371 if (shift < (8 << vece)) {
4372 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4373 } else {
4374 /* Nop, but we do need to clear the tail. */
4375 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4376 }
4377 }
4378
4379 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4380 {
4381 uint64_t mask = dup_const(MO_8, 0xff << shift);
4382 TCGv_i64 t = tcg_temp_new_i64();
4383
4384 tcg_gen_shli_i64(t, a, shift);
4385 tcg_gen_andi_i64(t, t, mask);
4386 tcg_gen_andi_i64(d, d, ~mask);
4387 tcg_gen_or_i64(d, d, t);
4388 tcg_temp_free_i64(t);
4389 }
4390
4391 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4392 {
4393 uint64_t mask = dup_const(MO_16, 0xffff << shift);
4394 TCGv_i64 t = tcg_temp_new_i64();
4395
4396 tcg_gen_shli_i64(t, a, shift);
4397 tcg_gen_andi_i64(t, t, mask);
4398 tcg_gen_andi_i64(d, d, ~mask);
4399 tcg_gen_or_i64(d, d, t);
4400 tcg_temp_free_i64(t);
4401 }
4402
4403 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4404 {
4405 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4406 }
4407
4408 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4409 {
4410 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4411 }
4412
4413 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4414 {
4415 TCGv_vec t = tcg_temp_new_vec_matching(d);
4416 TCGv_vec m = tcg_temp_new_vec_matching(d);
4417
4418 tcg_gen_shli_vec(vece, t, a, sh);
4419 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4420 tcg_gen_and_vec(vece, d, d, m);
4421 tcg_gen_or_vec(vece, d, d, t);
4422
4423 tcg_temp_free_vec(t);
4424 tcg_temp_free_vec(m);
4425 }
4426
4427 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4428 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4429 {
4430 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4431 const GVecGen2i ops[4] = {
4432 { .fni8 = gen_shl8_ins_i64,
4433 .fniv = gen_shl_ins_vec,
4434 .fno = gen_helper_gvec_sli_b,
4435 .load_dest = true,
4436 .opt_opc = vecop_list,
4437 .vece = MO_8 },
4438 { .fni8 = gen_shl16_ins_i64,
4439 .fniv = gen_shl_ins_vec,
4440 .fno = gen_helper_gvec_sli_h,
4441 .load_dest = true,
4442 .opt_opc = vecop_list,
4443 .vece = MO_16 },
4444 { .fni4 = gen_shl32_ins_i32,
4445 .fniv = gen_shl_ins_vec,
4446 .fno = gen_helper_gvec_sli_s,
4447 .load_dest = true,
4448 .opt_opc = vecop_list,
4449 .vece = MO_32 },
4450 { .fni8 = gen_shl64_ins_i64,
4451 .fniv = gen_shl_ins_vec,
4452 .fno = gen_helper_gvec_sli_d,
4453 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4454 .load_dest = true,
4455 .opt_opc = vecop_list,
4456 .vece = MO_64 },
4457 };
4458
4459 /* tszimm encoding produces immediates in the range [0..esize-1]. */
4460 tcg_debug_assert(shift >= 0);
4461 tcg_debug_assert(shift < (8 << vece));
4462
4463 if (shift == 0) {
4464 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4465 } else {
4466 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4467 }
4468 }
4469
4470 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4471 {
4472 gen_helper_neon_mul_u8(a, a, b);
4473 gen_helper_neon_add_u8(d, d, a);
4474 }
4475
4476 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4477 {
4478 gen_helper_neon_mul_u8(a, a, b);
4479 gen_helper_neon_sub_u8(d, d, a);
4480 }
4481
4482 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4483 {
4484 gen_helper_neon_mul_u16(a, a, b);
4485 gen_helper_neon_add_u16(d, d, a);
4486 }
4487
4488 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4489 {
4490 gen_helper_neon_mul_u16(a, a, b);
4491 gen_helper_neon_sub_u16(d, d, a);
4492 }
4493
4494 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4495 {
4496 tcg_gen_mul_i32(a, a, b);
4497 tcg_gen_add_i32(d, d, a);
4498 }
4499
4500 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4501 {
4502 tcg_gen_mul_i32(a, a, b);
4503 tcg_gen_sub_i32(d, d, a);
4504 }
4505
4506 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4507 {
4508 tcg_gen_mul_i64(a, a, b);
4509 tcg_gen_add_i64(d, d, a);
4510 }
4511
4512 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4513 {
4514 tcg_gen_mul_i64(a, a, b);
4515 tcg_gen_sub_i64(d, d, a);
4516 }
4517
4518 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4519 {
4520 tcg_gen_mul_vec(vece, a, a, b);
4521 tcg_gen_add_vec(vece, d, d, a);
4522 }
4523
4524 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4525 {
4526 tcg_gen_mul_vec(vece, a, a, b);
4527 tcg_gen_sub_vec(vece, d, d, a);
4528 }
4529
4530 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4531 * these tables are shared with AArch64 which does support them.
4532 */
4533 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4534 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4535 {
4536 static const TCGOpcode vecop_list[] = {
4537 INDEX_op_mul_vec, INDEX_op_add_vec, 0
4538 };
4539 static const GVecGen3 ops[4] = {
4540 { .fni4 = gen_mla8_i32,
4541 .fniv = gen_mla_vec,
4542 .load_dest = true,
4543 .opt_opc = vecop_list,
4544 .vece = MO_8 },
4545 { .fni4 = gen_mla16_i32,
4546 .fniv = gen_mla_vec,
4547 .load_dest = true,
4548 .opt_opc = vecop_list,
4549 .vece = MO_16 },
4550 { .fni4 = gen_mla32_i32,
4551 .fniv = gen_mla_vec,
4552 .load_dest = true,
4553 .opt_opc = vecop_list,
4554 .vece = MO_32 },
4555 { .fni8 = gen_mla64_i64,
4556 .fniv = gen_mla_vec,
4557 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4558 .load_dest = true,
4559 .opt_opc = vecop_list,
4560 .vece = MO_64 },
4561 };
4562 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4563 }
4564
4565 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4566 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4567 {
4568 static const TCGOpcode vecop_list[] = {
4569 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4570 };
4571 static const GVecGen3 ops[4] = {
4572 { .fni4 = gen_mls8_i32,
4573 .fniv = gen_mls_vec,
4574 .load_dest = true,
4575 .opt_opc = vecop_list,
4576 .vece = MO_8 },
4577 { .fni4 = gen_mls16_i32,
4578 .fniv = gen_mls_vec,
4579 .load_dest = true,
4580 .opt_opc = vecop_list,
4581 .vece = MO_16 },
4582 { .fni4 = gen_mls32_i32,
4583 .fniv = gen_mls_vec,
4584 .load_dest = true,
4585 .opt_opc = vecop_list,
4586 .vece = MO_32 },
4587 { .fni8 = gen_mls64_i64,
4588 .fniv = gen_mls_vec,
4589 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4590 .load_dest = true,
4591 .opt_opc = vecop_list,
4592 .vece = MO_64 },
4593 };
4594 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4595 }
4596
4597 /* CMTST : test is "if (X & Y != 0)". */
4598 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4599 {
4600 tcg_gen_and_i32(d, a, b);
4601 tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4602 tcg_gen_neg_i32(d, d);
4603 }
4604
4605 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4606 {
4607 tcg_gen_and_i64(d, a, b);
4608 tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4609 tcg_gen_neg_i64(d, d);
4610 }
4611
4612 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4613 {
4614 tcg_gen_and_vec(vece, d, a, b);
4615 tcg_gen_dupi_vec(vece, a, 0);
4616 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4617 }
4618
4619 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4620 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4621 {
4622 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4623 static const GVecGen3 ops[4] = {
4624 { .fni4 = gen_helper_neon_tst_u8,
4625 .fniv = gen_cmtst_vec,
4626 .opt_opc = vecop_list,
4627 .vece = MO_8 },
4628 { .fni4 = gen_helper_neon_tst_u16,
4629 .fniv = gen_cmtst_vec,
4630 .opt_opc = vecop_list,
4631 .vece = MO_16 },
4632 { .fni4 = gen_cmtst_i32,
4633 .fniv = gen_cmtst_vec,
4634 .opt_opc = vecop_list,
4635 .vece = MO_32 },
4636 { .fni8 = gen_cmtst_i64,
4637 .fniv = gen_cmtst_vec,
4638 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4639 .opt_opc = vecop_list,
4640 .vece = MO_64 },
4641 };
4642 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4643 }
4644
4645 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4646 {
4647 TCGv_i32 lval = tcg_temp_new_i32();
4648 TCGv_i32 rval = tcg_temp_new_i32();
4649 TCGv_i32 lsh = tcg_temp_new_i32();
4650 TCGv_i32 rsh = tcg_temp_new_i32();
4651 TCGv_i32 zero = tcg_const_i32(0);
4652 TCGv_i32 max = tcg_const_i32(32);
4653
4654 /*
4655 * Rely on the TCG guarantee that out of range shifts produce
4656 * unspecified results, not undefined behaviour (i.e. no trap).
4657 * Discard out-of-range results after the fact.
4658 */
4659 tcg_gen_ext8s_i32(lsh, shift);
4660 tcg_gen_neg_i32(rsh, lsh);
4661 tcg_gen_shl_i32(lval, src, lsh);
4662 tcg_gen_shr_i32(rval, src, rsh);
4663 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4664 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4665
4666 tcg_temp_free_i32(lval);
4667 tcg_temp_free_i32(rval);
4668 tcg_temp_free_i32(lsh);
4669 tcg_temp_free_i32(rsh);
4670 tcg_temp_free_i32(zero);
4671 tcg_temp_free_i32(max);
4672 }
4673
4674 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4675 {
4676 TCGv_i64 lval = tcg_temp_new_i64();
4677 TCGv_i64 rval = tcg_temp_new_i64();
4678 TCGv_i64 lsh = tcg_temp_new_i64();
4679 TCGv_i64 rsh = tcg_temp_new_i64();
4680 TCGv_i64 zero = tcg_const_i64(0);
4681 TCGv_i64 max = tcg_const_i64(64);
4682
4683 /*
4684 * Rely on the TCG guarantee that out of range shifts produce
4685 * unspecified results, not undefined behaviour (i.e. no trap).
4686 * Discard out-of-range results after the fact.
4687 */
4688 tcg_gen_ext8s_i64(lsh, shift);
4689 tcg_gen_neg_i64(rsh, lsh);
4690 tcg_gen_shl_i64(lval, src, lsh);
4691 tcg_gen_shr_i64(rval, src, rsh);
4692 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4693 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4694
4695 tcg_temp_free_i64(lval);
4696 tcg_temp_free_i64(rval);
4697 tcg_temp_free_i64(lsh);
4698 tcg_temp_free_i64(rsh);
4699 tcg_temp_free_i64(zero);
4700 tcg_temp_free_i64(max);
4701 }
4702
4703 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4704 TCGv_vec src, TCGv_vec shift)
4705 {
4706 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4707 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4708 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4709 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4710 TCGv_vec msk, max;
4711
4712 tcg_gen_neg_vec(vece, rsh, shift);
4713 if (vece == MO_8) {
4714 tcg_gen_mov_vec(lsh, shift);
4715 } else {
4716 msk = tcg_temp_new_vec_matching(dst);
4717 tcg_gen_dupi_vec(vece, msk, 0xff);
4718 tcg_gen_and_vec(vece, lsh, shift, msk);
4719 tcg_gen_and_vec(vece, rsh, rsh, msk);
4720 tcg_temp_free_vec(msk);
4721 }
4722
4723 /*
4724 * Rely on the TCG guarantee that out of range shifts produce
4725 * unspecified results, not undefined behaviour (i.e. no trap).
4726 * Discard out-of-range results after the fact.
4727 */
4728 tcg_gen_shlv_vec(vece, lval, src, lsh);
4729 tcg_gen_shrv_vec(vece, rval, src, rsh);
4730
4731 max = tcg_temp_new_vec_matching(dst);
4732 tcg_gen_dupi_vec(vece, max, 8 << vece);
4733
4734 /*
4735 * The choice of LT (signed) and GEU (unsigned) are biased toward
4736 * the instructions of the x86_64 host. For MO_8, the whole byte
4737 * is significant so we must use an unsigned compare; otherwise we
4738 * have already masked to a byte and so a signed compare works.
4739 * Other tcg hosts have a full set of comparisons and do not care.
4740 */
4741 if (vece == MO_8) {
4742 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4743 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4744 tcg_gen_andc_vec(vece, lval, lval, lsh);
4745 tcg_gen_andc_vec(vece, rval, rval, rsh);
4746 } else {
4747 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4748 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4749 tcg_gen_and_vec(vece, lval, lval, lsh);
4750 tcg_gen_and_vec(vece, rval, rval, rsh);
4751 }
4752 tcg_gen_or_vec(vece, dst, lval, rval);
4753
4754 tcg_temp_free_vec(max);
4755 tcg_temp_free_vec(lval);
4756 tcg_temp_free_vec(rval);
4757 tcg_temp_free_vec(lsh);
4758 tcg_temp_free_vec(rsh);
4759 }
4760
4761 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4762 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4763 {
4764 static const TCGOpcode vecop_list[] = {
4765 INDEX_op_neg_vec, INDEX_op_shlv_vec,
4766 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4767 };
4768 static const GVecGen3 ops[4] = {
4769 { .fniv = gen_ushl_vec,
4770 .fno = gen_helper_gvec_ushl_b,
4771 .opt_opc = vecop_list,
4772 .vece = MO_8 },
4773 { .fniv = gen_ushl_vec,
4774 .fno = gen_helper_gvec_ushl_h,
4775 .opt_opc = vecop_list,
4776 .vece = MO_16 },
4777 { .fni4 = gen_ushl_i32,
4778 .fniv = gen_ushl_vec,
4779 .opt_opc = vecop_list,
4780 .vece = MO_32 },
4781 { .fni8 = gen_ushl_i64,
4782 .fniv = gen_ushl_vec,
4783 .opt_opc = vecop_list,
4784 .vece = MO_64 },
4785 };
4786 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4787 }
4788
4789 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4790 {
4791 TCGv_i32 lval = tcg_temp_new_i32();
4792 TCGv_i32 rval = tcg_temp_new_i32();
4793 TCGv_i32 lsh = tcg_temp_new_i32();
4794 TCGv_i32 rsh = tcg_temp_new_i32();
4795 TCGv_i32 zero = tcg_const_i32(0);
4796 TCGv_i32 max = tcg_const_i32(31);
4797
4798 /*
4799 * Rely on the TCG guarantee that out of range shifts produce
4800 * unspecified results, not undefined behaviour (i.e. no trap).
4801 * Discard out-of-range results after the fact.
4802 */
4803 tcg_gen_ext8s_i32(lsh, shift);
4804 tcg_gen_neg_i32(rsh, lsh);
4805 tcg_gen_shl_i32(lval, src, lsh);
4806 tcg_gen_umin_i32(rsh, rsh, max);
4807 tcg_gen_sar_i32(rval, src, rsh);
4808 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4809 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4810
4811 tcg_temp_free_i32(lval);
4812 tcg_temp_free_i32(rval);
4813 tcg_temp_free_i32(lsh);
4814 tcg_temp_free_i32(rsh);
4815 tcg_temp_free_i32(zero);
4816 tcg_temp_free_i32(max);
4817 }
4818
4819 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4820 {
4821 TCGv_i64 lval = tcg_temp_new_i64();
4822 TCGv_i64 rval = tcg_temp_new_i64();
4823 TCGv_i64 lsh = tcg_temp_new_i64();
4824 TCGv_i64 rsh = tcg_temp_new_i64();
4825 TCGv_i64 zero = tcg_const_i64(0);
4826 TCGv_i64 max = tcg_const_i64(63);
4827
4828 /*
4829 * Rely on the TCG guarantee that out of range shifts produce
4830 * unspecified results, not undefined behaviour (i.e. no trap).
4831 * Discard out-of-range results after the fact.
4832 */
4833 tcg_gen_ext8s_i64(lsh, shift);
4834 tcg_gen_neg_i64(rsh, lsh);
4835 tcg_gen_shl_i64(lval, src, lsh);
4836 tcg_gen_umin_i64(rsh, rsh, max);
4837 tcg_gen_sar_i64(rval, src, rsh);
4838 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4839 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4840
4841 tcg_temp_free_i64(lval);
4842 tcg_temp_free_i64(rval);
4843 tcg_temp_free_i64(lsh);
4844 tcg_temp_free_i64(rsh);
4845 tcg_temp_free_i64(zero);
4846 tcg_temp_free_i64(max);
4847 }
4848
4849 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4850 TCGv_vec src, TCGv_vec shift)
4851 {
4852 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4853 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4854 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4855 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4856 TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4857
4858 /*
4859 * Rely on the TCG guarantee that out of range shifts produce
4860 * unspecified results, not undefined behaviour (i.e. no trap).
4861 * Discard out-of-range results after the fact.
4862 */
4863 tcg_gen_neg_vec(vece, rsh, shift);
4864 if (vece == MO_8) {
4865 tcg_gen_mov_vec(lsh, shift);
4866 } else {
4867 tcg_gen_dupi_vec(vece, tmp, 0xff);
4868 tcg_gen_and_vec(vece, lsh, shift, tmp);
4869 tcg_gen_and_vec(vece, rsh, rsh, tmp);
4870 }
4871
4872 /* Bound rsh so out of bound right shift gets -1. */
4873 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4874 tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4875 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4876
4877 tcg_gen_shlv_vec(vece, lval, src, lsh);
4878 tcg_gen_sarv_vec(vece, rval, src, rsh);
4879
4880 /* Select in-bound left shift. */
4881 tcg_gen_andc_vec(vece, lval, lval, tmp);
4882
4883 /* Select between left and right shift. */
4884 if (vece == MO_8) {
4885 tcg_gen_dupi_vec(vece, tmp, 0);
4886 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4887 } else {
4888 tcg_gen_dupi_vec(vece, tmp, 0x80);
4889 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4890 }
4891
4892 tcg_temp_free_vec(lval);
4893 tcg_temp_free_vec(rval);
4894 tcg_temp_free_vec(lsh);
4895 tcg_temp_free_vec(rsh);
4896 tcg_temp_free_vec(tmp);
4897 }
4898
4899 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4900 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4901 {
4902 static const TCGOpcode vecop_list[] = {
4903 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4904 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4905 };
4906 static const GVecGen3 ops[4] = {
4907 { .fniv = gen_sshl_vec,
4908 .fno = gen_helper_gvec_sshl_b,
4909 .opt_opc = vecop_list,
4910 .vece = MO_8 },
4911 { .fniv = gen_sshl_vec,
4912 .fno = gen_helper_gvec_sshl_h,
4913 .opt_opc = vecop_list,
4914 .vece = MO_16 },
4915 { .fni4 = gen_sshl_i32,
4916 .fniv = gen_sshl_vec,
4917 .opt_opc = vecop_list,
4918 .vece = MO_32 },
4919 { .fni8 = gen_sshl_i64,
4920 .fniv = gen_sshl_vec,
4921 .opt_opc = vecop_list,
4922 .vece = MO_64 },
4923 };
4924 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4925 }
4926
4927 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4928 TCGv_vec a, TCGv_vec b)
4929 {
4930 TCGv_vec x = tcg_temp_new_vec_matching(t);
4931 tcg_gen_add_vec(vece, x, a, b);
4932 tcg_gen_usadd_vec(vece, t, a, b);
4933 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4934 tcg_gen_or_vec(vece, sat, sat, x);
4935 tcg_temp_free_vec(x);
4936 }
4937
4938 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4939 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4940 {
4941 static const TCGOpcode vecop_list[] = {
4942 INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4943 };
4944 static const GVecGen4 ops[4] = {
4945 { .fniv = gen_uqadd_vec,
4946 .fno = gen_helper_gvec_uqadd_b,
4947 .write_aofs = true,
4948 .opt_opc = vecop_list,
4949 .vece = MO_8 },
4950 { .fniv = gen_uqadd_vec,
4951 .fno = gen_helper_gvec_uqadd_h,
4952 .write_aofs = true,
4953 .opt_opc = vecop_list,
4954 .vece = MO_16 },
4955 { .fniv = gen_uqadd_vec,
4956 .fno = gen_helper_gvec_uqadd_s,
4957 .write_aofs = true,
4958 .opt_opc = vecop_list,
4959 .vece = MO_32 },
4960 { .fniv = gen_uqadd_vec,
4961 .fno = gen_helper_gvec_uqadd_d,
4962 .write_aofs = true,
4963 .opt_opc = vecop_list,
4964 .vece = MO_64 },
4965 };
4966 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4967 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4968 }
4969
4970 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4971 TCGv_vec a, TCGv_vec b)
4972 {
4973 TCGv_vec x = tcg_temp_new_vec_matching(t);
4974 tcg_gen_add_vec(vece, x, a, b);
4975 tcg_gen_ssadd_vec(vece, t, a, b);
4976 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4977 tcg_gen_or_vec(vece, sat, sat, x);
4978 tcg_temp_free_vec(x);
4979 }
4980
4981 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4982 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4983 {
4984 static const TCGOpcode vecop_list[] = {
4985 INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4986 };
4987 static const GVecGen4 ops[4] = {
4988 { .fniv = gen_sqadd_vec,
4989 .fno = gen_helper_gvec_sqadd_b,
4990 .opt_opc = vecop_list,
4991 .write_aofs = true,
4992 .vece = MO_8 },
4993 { .fniv = gen_sqadd_vec,
4994 .fno = gen_helper_gvec_sqadd_h,
4995 .opt_opc = vecop_list,
4996 .write_aofs = true,
4997 .vece = MO_16 },
4998 { .fniv = gen_sqadd_vec,
4999 .fno = gen_helper_gvec_sqadd_s,
5000 .opt_opc = vecop_list,
5001 .write_aofs = true,
5002 .vece = MO_32 },
5003 { .fniv = gen_sqadd_vec,
5004 .fno = gen_helper_gvec_sqadd_d,
5005 .opt_opc = vecop_list,
5006 .write_aofs = true,
5007 .vece = MO_64 },
5008 };
5009 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5010 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5011 }
5012
5013 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5014 TCGv_vec a, TCGv_vec b)
5015 {
5016 TCGv_vec x = tcg_temp_new_vec_matching(t);
5017 tcg_gen_sub_vec(vece, x, a, b);
5018 tcg_gen_ussub_vec(vece, t, a, b);
5019 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5020 tcg_gen_or_vec(vece, sat, sat, x);
5021 tcg_temp_free_vec(x);
5022 }
5023
5024 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5025 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5026 {
5027 static const TCGOpcode vecop_list[] = {
5028 INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5029 };
5030 static const GVecGen4 ops[4] = {
5031 { .fniv = gen_uqsub_vec,
5032 .fno = gen_helper_gvec_uqsub_b,
5033 .opt_opc = vecop_list,
5034 .write_aofs = true,
5035 .vece = MO_8 },
5036 { .fniv = gen_uqsub_vec,
5037 .fno = gen_helper_gvec_uqsub_h,
5038 .opt_opc = vecop_list,
5039 .write_aofs = true,
5040 .vece = MO_16 },
5041 { .fniv = gen_uqsub_vec,
5042 .fno = gen_helper_gvec_uqsub_s,
5043 .opt_opc = vecop_list,
5044 .write_aofs = true,
5045 .vece = MO_32 },
5046 { .fniv = gen_uqsub_vec,
5047 .fno = gen_helper_gvec_uqsub_d,
5048 .opt_opc = vecop_list,
5049 .write_aofs = true,
5050 .vece = MO_64 },
5051 };
5052 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5053 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5054 }
5055
5056 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5057 TCGv_vec a, TCGv_vec b)
5058 {
5059 TCGv_vec x = tcg_temp_new_vec_matching(t);
5060 tcg_gen_sub_vec(vece, x, a, b);
5061 tcg_gen_sssub_vec(vece, t, a, b);
5062 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5063 tcg_gen_or_vec(vece, sat, sat, x);
5064 tcg_temp_free_vec(x);
5065 }
5066
5067 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5068 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5069 {
5070 static const TCGOpcode vecop_list[] = {
5071 INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5072 };
5073 static const GVecGen4 ops[4] = {
5074 { .fniv = gen_sqsub_vec,
5075 .fno = gen_helper_gvec_sqsub_b,
5076 .opt_opc = vecop_list,
5077 .write_aofs = true,
5078 .vece = MO_8 },
5079 { .fniv = gen_sqsub_vec,
5080 .fno = gen_helper_gvec_sqsub_h,
5081 .opt_opc = vecop_list,
5082 .write_aofs = true,
5083 .vece = MO_16 },
5084 { .fniv = gen_sqsub_vec,
5085 .fno = gen_helper_gvec_sqsub_s,
5086 .opt_opc = vecop_list,
5087 .write_aofs = true,
5088 .vece = MO_32 },
5089 { .fniv = gen_sqsub_vec,
5090 .fno = gen_helper_gvec_sqsub_d,
5091 .opt_opc = vecop_list,
5092 .write_aofs = true,
5093 .vece = MO_64 },
5094 };
5095 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5096 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5097 }
5098
5099 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5100 {
5101 TCGv_i32 t = tcg_temp_new_i32();
5102
5103 tcg_gen_sub_i32(t, a, b);
5104 tcg_gen_sub_i32(d, b, a);
5105 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
5106 tcg_temp_free_i32(t);
5107 }
5108
5109 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5110 {
5111 TCGv_i64 t = tcg_temp_new_i64();
5112
5113 tcg_gen_sub_i64(t, a, b);
5114 tcg_gen_sub_i64(d, b, a);
5115 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
5116 tcg_temp_free_i64(t);
5117 }
5118
5119 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5120 {
5121 TCGv_vec t = tcg_temp_new_vec_matching(d);
5122
5123 tcg_gen_smin_vec(vece, t, a, b);
5124 tcg_gen_smax_vec(vece, d, a, b);
5125 tcg_gen_sub_vec(vece, d, d, t);
5126 tcg_temp_free_vec(t);
5127 }
5128
5129 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5130 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5131 {
5132 static const TCGOpcode vecop_list[] = {
5133 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5134 };
5135 static const GVecGen3 ops[4] = {
5136 { .fniv = gen_sabd_vec,
5137 .fno = gen_helper_gvec_sabd_b,
5138 .opt_opc = vecop_list,
5139 .vece = MO_8 },
5140 { .fniv = gen_sabd_vec,
5141 .fno = gen_helper_gvec_sabd_h,
5142 .opt_opc = vecop_list,
5143 .vece = MO_16 },
5144 { .fni4 = gen_sabd_i32,
5145 .fniv = gen_sabd_vec,
5146 .fno = gen_helper_gvec_sabd_s,
5147 .opt_opc = vecop_list,
5148 .vece = MO_32 },
5149 { .fni8 = gen_sabd_i64,
5150 .fniv = gen_sabd_vec,
5151 .fno = gen_helper_gvec_sabd_d,
5152 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5153 .opt_opc = vecop_list,
5154 .vece = MO_64 },
5155 };
5156 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5157 }
5158
5159 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5160 {
5161 TCGv_i32 t = tcg_temp_new_i32();
5162
5163 tcg_gen_sub_i32(t, a, b);
5164 tcg_gen_sub_i32(d, b, a);
5165 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
5166 tcg_temp_free_i32(t);
5167 }
5168
5169 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5170 {
5171 TCGv_i64 t = tcg_temp_new_i64();
5172
5173 tcg_gen_sub_i64(t, a, b);
5174 tcg_gen_sub_i64(d, b, a);
5175 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
5176 tcg_temp_free_i64(t);
5177 }
5178
5179 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5180 {
5181 TCGv_vec t = tcg_temp_new_vec_matching(d);
5182
5183 tcg_gen_umin_vec(vece, t, a, b);
5184 tcg_gen_umax_vec(vece, d, a, b);
5185 tcg_gen_sub_vec(vece, d, d, t);
5186 tcg_temp_free_vec(t);
5187 }
5188
5189 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5190 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5191 {
5192 static const TCGOpcode vecop_list[] = {
5193 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5194 };
5195 static const GVecGen3 ops[4] = {
5196 { .fniv = gen_uabd_vec,
5197 .fno = gen_helper_gvec_uabd_b,
5198 .opt_opc = vecop_list,
5199 .vece = MO_8 },
5200 { .fniv = gen_uabd_vec,
5201 .fno = gen_helper_gvec_uabd_h,
5202 .opt_opc = vecop_list,
5203 .vece = MO_16 },
5204 { .fni4 = gen_uabd_i32,
5205 .fniv = gen_uabd_vec,
5206 .fno = gen_helper_gvec_uabd_s,
5207 .opt_opc = vecop_list,
5208 .vece = MO_32 },
5209 { .fni8 = gen_uabd_i64,
5210 .fniv = gen_uabd_vec,
5211 .fno = gen_helper_gvec_uabd_d,
5212 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5213 .opt_opc = vecop_list,
5214 .vece = MO_64 },
5215 };
5216 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5217 }
5218
5219 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5220 {
5221 TCGv_i32 t = tcg_temp_new_i32();
5222 gen_sabd_i32(t, a, b);
5223 tcg_gen_add_i32(d, d, t);
5224 tcg_temp_free_i32(t);
5225 }
5226
5227 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5228 {
5229 TCGv_i64 t = tcg_temp_new_i64();
5230 gen_sabd_i64(t, a, b);
5231 tcg_gen_add_i64(d, d, t);
5232 tcg_temp_free_i64(t);
5233 }
5234
5235 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5236 {
5237 TCGv_vec t = tcg_temp_new_vec_matching(d);
5238 gen_sabd_vec(vece, t, a, b);
5239 tcg_gen_add_vec(vece, d, d, t);
5240 tcg_temp_free_vec(t);
5241 }
5242
5243 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5244 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5245 {
5246 static const TCGOpcode vecop_list[] = {
5247 INDEX_op_sub_vec, INDEX_op_add_vec,
5248 INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5249 };
5250 static const GVecGen3 ops[4] = {
5251 { .fniv = gen_saba_vec,
5252 .fno = gen_helper_gvec_saba_b,
5253 .opt_opc = vecop_list,
5254 .load_dest = true,
5255 .vece = MO_8 },
5256 { .fniv = gen_saba_vec,
5257 .fno = gen_helper_gvec_saba_h,
5258 .opt_opc = vecop_list,
5259 .load_dest = true,
5260 .vece = MO_16 },
5261 { .fni4 = gen_saba_i32,
5262 .fniv = gen_saba_vec,
5263 .fno = gen_helper_gvec_saba_s,
5264 .opt_opc = vecop_list,
5265 .load_dest = true,
5266 .vece = MO_32 },
5267 { .fni8 = gen_saba_i64,
5268 .fniv = gen_saba_vec,
5269 .fno = gen_helper_gvec_saba_d,
5270 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5271 .opt_opc = vecop_list,
5272 .load_dest = true,
5273 .vece = MO_64 },
5274 };
5275 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5276 }
5277
5278 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5279 {
5280 TCGv_i32 t = tcg_temp_new_i32();
5281 gen_uabd_i32(t, a, b);
5282 tcg_gen_add_i32(d, d, t);
5283 tcg_temp_free_i32(t);
5284 }
5285
5286 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5287 {
5288 TCGv_i64 t = tcg_temp_new_i64();
5289 gen_uabd_i64(t, a, b);
5290 tcg_gen_add_i64(d, d, t);
5291 tcg_temp_free_i64(t);
5292 }
5293
5294 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5295 {
5296 TCGv_vec t = tcg_temp_new_vec_matching(d);
5297 gen_uabd_vec(vece, t, a, b);
5298 tcg_gen_add_vec(vece, d, d, t);
5299 tcg_temp_free_vec(t);
5300 }
5301
5302 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5303 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5304 {
5305 static const TCGOpcode vecop_list[] = {
5306 INDEX_op_sub_vec, INDEX_op_add_vec,
5307 INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5308 };
5309 static const GVecGen3 ops[4] = {
5310 { .fniv = gen_uaba_vec,
5311 .fno = gen_helper_gvec_uaba_b,
5312 .opt_opc = vecop_list,
5313 .load_dest = true,
5314 .vece = MO_8 },
5315 { .fniv = gen_uaba_vec,
5316 .fno = gen_helper_gvec_uaba_h,
5317 .opt_opc = vecop_list,
5318 .load_dest = true,
5319 .vece = MO_16 },
5320 { .fni4 = gen_uaba_i32,
5321 .fniv = gen_uaba_vec,
5322 .fno = gen_helper_gvec_uaba_s,
5323 .opt_opc = vecop_list,
5324 .load_dest = true,
5325 .vece = MO_32 },
5326 { .fni8 = gen_uaba_i64,
5327 .fniv = gen_uaba_vec,
5328 .fno = gen_helper_gvec_uaba_d,
5329 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5330 .opt_opc = vecop_list,
5331 .load_dest = true,
5332 .vece = MO_64 },
5333 };
5334 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5335 }
5336
5337 /* Translate a NEON data processing instruction. Return nonzero if the
5338 instruction is invalid.
5339 We process data in a mixture of 32-bit and 64-bit chunks.
5340 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
5341
5342 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5343 {
5344 int op;
5345 int q;
5346 int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5347 int size;
5348 int shift;
5349 int pass;
5350 int count;
5351 int u;
5352 int vec_size;
5353 uint32_t imm;
5354 TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5355 TCGv_ptr ptr1, ptr2;
5356 TCGv_i64 tmp64;
5357
5358 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5359 return 1;
5360 }
5361
5362 /* FIXME: this access check should not take precedence over UNDEF
5363 * for invalid encodings; we will generate incorrect syndrome information
5364 * for attempts to execute invalid vfp/neon encodings with FP disabled.
5365 */
5366 if (s->fp_excp_el) {
5367 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5368 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5369 return 0;
5370 }
5371
5372 if (!s->vfp_enabled)
5373 return 1;
5374 q = (insn & (1 << 6)) != 0;
5375 u = (insn >> 24) & 1;
5376 VFP_DREG_D(rd, insn);
5377 VFP_DREG_N(rn, insn);
5378 VFP_DREG_M(rm, insn);
5379 size = (insn >> 20) & 3;
5380 vec_size = q ? 16 : 8;
5381 rd_ofs = neon_reg_offset(rd, 0);
5382 rn_ofs = neon_reg_offset(rn, 0);
5383 rm_ofs = neon_reg_offset(rm, 0);
5384
5385 if ((insn & (1 << 23)) == 0) {
5386 /* Three register same length. */
5387 op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5388 /* Catch invalid op and bad size combinations: UNDEF */
5389 if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5390 return 1;
5391 }
5392 /* All insns of this form UNDEF for either this condition or the
5393 * superset of cases "Q==1"; we catch the latter later.
5394 */
5395 if (q && ((rd | rn | rm) & 1)) {
5396 return 1;
5397 }
5398 switch (op) {
5399 case NEON_3R_VFM_VQRDMLSH:
5400 if (!u) {
5401 /* VFM, VFMS */
5402 if (size == 1) {
5403 return 1;
5404 }
5405 break;
5406 }
5407 /* VQRDMLSH : handled by decodetree */
5408 return 1;
5409
5410 case NEON_3R_VADD_VSUB:
5411 case NEON_3R_LOGIC:
5412 case NEON_3R_VMAX:
5413 case NEON_3R_VMIN:
5414 case NEON_3R_VTST_VCEQ:
5415 case NEON_3R_VCGT:
5416 case NEON_3R_VCGE:
5417 case NEON_3R_VQADD:
5418 case NEON_3R_VQSUB:
5419 case NEON_3R_VMUL:
5420 case NEON_3R_VML:
5421 case NEON_3R_VSHL:
5422 case NEON_3R_SHA:
5423 case NEON_3R_VHADD:
5424 case NEON_3R_VRHADD:
5425 case NEON_3R_VHSUB:
5426 case NEON_3R_VABD:
5427 case NEON_3R_VABA:
5428 case NEON_3R_VQSHL:
5429 case NEON_3R_VRSHL:
5430 case NEON_3R_VQRSHL:
5431 case NEON_3R_VPMAX:
5432 case NEON_3R_VPMIN:
5433 case NEON_3R_VPADD_VQRDMLAH:
5434 case NEON_3R_VQDMULH_VQRDMULH:
5435 case NEON_3R_FLOAT_ARITH:
5436 case NEON_3R_FLOAT_MULTIPLY:
5437 case NEON_3R_FLOAT_CMP:
5438 case NEON_3R_FLOAT_ACMP:
5439 /* Already handled by decodetree */
5440 return 1;
5441 }
5442
5443 if (size == 3) {
5444 /* 64-bit element instructions: handled by decodetree */
5445 return 1;
5446 }
5447 switch (op) {
5448 case NEON_3R_FLOAT_MINMAX:
5449 if (u) {
5450 return 1; /* VPMIN/VPMAX handled by decodetree */
5451 }
5452 break;
5453 case NEON_3R_FLOAT_MISC:
5454 /* VMAXNM/VMINNM in ARMv8 */
5455 if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5456 return 1;
5457 }
5458 break;
5459 case NEON_3R_VFM_VQRDMLSH:
5460 if (!dc_isar_feature(aa32_simdfmac, s)) {
5461 return 1;
5462 }
5463 break;
5464 default:
5465 break;
5466 }
5467
5468 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5469
5470 /* Elementwise. */
5471 tmp = neon_load_reg(rn, pass);
5472 tmp2 = neon_load_reg(rm, pass);
5473 switch (op) {
5474 case NEON_3R_FLOAT_MINMAX:
5475 {
5476 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5477 if (size == 0) {
5478 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5479 } else {
5480 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5481 }
5482 tcg_temp_free_ptr(fpstatus);
5483 break;
5484 }
5485 case NEON_3R_FLOAT_MISC:
5486 if (u) {
5487 /* VMAXNM/VMINNM */
5488 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5489 if (size == 0) {
5490 gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5491 } else {
5492 gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5493 }
5494 tcg_temp_free_ptr(fpstatus);
5495 } else {
5496 if (size == 0) {
5497 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5498 } else {
5499 gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5500 }
5501 }
5502 break;
5503 case NEON_3R_VFM_VQRDMLSH:
5504 {
5505 /* VFMA, VFMS: fused multiply-add */
5506 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5507 TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5508 if (size) {
5509 /* VFMS */
5510 gen_helper_vfp_negs(tmp, tmp);
5511 }
5512 gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5513 tcg_temp_free_i32(tmp3);
5514 tcg_temp_free_ptr(fpstatus);
5515 break;
5516 }
5517 default:
5518 abort();
5519 }
5520 tcg_temp_free_i32(tmp2);
5521
5522 neon_store_reg(rd, pass, tmp);
5523
5524 } /* for pass */
5525 /* End of 3 register same size operations. */
5526 } else if (insn & (1 << 4)) {
5527 if ((insn & 0x00380080) != 0) {
5528 /* Two registers and shift. */
5529 op = (insn >> 8) & 0xf;
5530 if (insn & (1 << 7)) {
5531 /* 64-bit shift. */
5532 if (op > 7) {
5533 return 1;
5534 }
5535 size = 3;
5536 } else {
5537 size = 2;
5538 while ((insn & (1 << (size + 19))) == 0)
5539 size--;
5540 }
5541 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5542 if (op < 8) {
5543 /* Shift by immediate:
5544 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
5545 if (q && ((rd | rm) & 1)) {
5546 return 1;
5547 }
5548 if (!u && (op == 4 || op == 6)) {
5549 return 1;
5550 }
5551 /* Right shifts are encoded as N - shift, where N is the
5552 element size in bits. */
5553 if (op <= 4) {
5554 shift = shift - (1 << (size + 3));
5555 }
5556
5557 switch (op) {
5558 case 0: /* VSHR */
5559 /* Right shift comes here negative. */
5560 shift = -shift;
5561 /* Shifts larger than the element size are architecturally
5562 * valid. Unsigned results in all zeros; signed results
5563 * in all sign bits.
5564 */
5565 if (!u) {
5566 tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5567 MIN(shift, (8 << size) - 1),
5568 vec_size, vec_size);
5569 } else if (shift >= 8 << size) {
5570 tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
5571 vec_size, 0);
5572 } else {
5573 tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5574 vec_size, vec_size);
5575 }
5576 return 0;
5577
5578 case 1: /* VSRA */
5579 /* Right shift comes here negative. */
5580 shift = -shift;
5581 if (u) {
5582 gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
5583 vec_size, vec_size);
5584 } else {
5585 gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
5586 vec_size, vec_size);
5587 }
5588 return 0;
5589
5590 case 2: /* VRSHR */
5591 /* Right shift comes here negative. */
5592 shift = -shift;
5593 if (u) {
5594 gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
5595 vec_size, vec_size);
5596 } else {
5597 gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
5598 vec_size, vec_size);
5599 }
5600 return 0;
5601
5602 case 3: /* VRSRA */
5603 /* Right shift comes here negative. */
5604 shift = -shift;
5605 if (u) {
5606 gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
5607 vec_size, vec_size);
5608 } else {
5609 gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
5610 vec_size, vec_size);
5611 }
5612 return 0;
5613
5614 case 4: /* VSRI */
5615 if (!u) {
5616 return 1;
5617 }
5618 /* Right shift comes here negative. */
5619 shift = -shift;
5620 gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
5621 vec_size, vec_size);
5622 return 0;
5623
5624 case 5: /* VSHL, VSLI */
5625 if (u) { /* VSLI */
5626 gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
5627 vec_size, vec_size);
5628 } else { /* VSHL */
5629 tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5630 vec_size, vec_size);
5631 }
5632 return 0;
5633 }
5634
5635 if (size == 3) {
5636 count = q + 1;
5637 } else {
5638 count = q ? 4: 2;
5639 }
5640
5641 /* To avoid excessive duplication of ops we implement shift
5642 * by immediate using the variable shift operations.
5643 */
5644 imm = dup_const(size, shift);
5645
5646 for (pass = 0; pass < count; pass++) {
5647 if (size == 3) {
5648 neon_load_reg64(cpu_V0, rm + pass);
5649 tcg_gen_movi_i64(cpu_V1, imm);
5650 switch (op) {
5651 case 6: /* VQSHLU */
5652 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5653 cpu_V0, cpu_V1);
5654 break;
5655 case 7: /* VQSHL */
5656 if (u) {
5657 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5658 cpu_V0, cpu_V1);
5659 } else {
5660 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5661 cpu_V0, cpu_V1);
5662 }
5663 break;
5664 default:
5665 g_assert_not_reached();
5666 }
5667 neon_store_reg64(cpu_V0, rd + pass);
5668 } else { /* size < 3 */
5669 /* Operands in T0 and T1. */
5670 tmp = neon_load_reg(rm, pass);
5671 tmp2 = tcg_temp_new_i32();
5672 tcg_gen_movi_i32(tmp2, imm);
5673 switch (op) {
5674 case 6: /* VQSHLU */
5675 switch (size) {
5676 case 0:
5677 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5678 tmp, tmp2);
5679 break;
5680 case 1:
5681 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5682 tmp, tmp2);
5683 break;
5684 case 2:
5685 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5686 tmp, tmp2);
5687 break;
5688 default:
5689 abort();
5690 }
5691 break;
5692 case 7: /* VQSHL */
5693 GEN_NEON_INTEGER_OP_ENV(qshl);
5694 break;
5695 default:
5696 g_assert_not_reached();
5697 }
5698 tcg_temp_free_i32(tmp2);
5699 neon_store_reg(rd, pass, tmp);
5700 }
5701 } /* for pass */
5702 } else if (op < 10) {
5703 /* Shift by immediate and narrow:
5704 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
5705 int input_unsigned = (op == 8) ? !u : u;
5706 if (rm & 1) {
5707 return 1;
5708 }
5709 shift = shift - (1 << (size + 3));
5710 size++;
5711 if (size == 3) {
5712 tmp64 = tcg_const_i64(shift);
5713 neon_load_reg64(cpu_V0, rm);
5714 neon_load_reg64(cpu_V1, rm + 1);
5715 for (pass = 0; pass < 2; pass++) {
5716 TCGv_i64 in;
5717 if (pass == 0) {
5718 in = cpu_V0;
5719 } else {
5720 in = cpu_V1;
5721 }
5722 if (q) {
5723 if (input_unsigned) {
5724 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5725 } else {
5726 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5727 }
5728 } else {
5729 if (input_unsigned) {
5730 gen_ushl_i64(cpu_V0, in, tmp64);
5731 } else {
5732 gen_sshl_i64(cpu_V0, in, tmp64);
5733 }
5734 }
5735 tmp = tcg_temp_new_i32();
5736 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5737 neon_store_reg(rd, pass, tmp);
5738 } /* for pass */
5739 tcg_temp_free_i64(tmp64);
5740 } else {
5741 if (size == 1) {
5742 imm = (uint16_t)shift;
5743 imm |= imm << 16;
5744 } else {
5745 /* size == 2 */
5746 imm = (uint32_t)shift;
5747 }
5748 tmp2 = tcg_const_i32(imm);
5749 tmp4 = neon_load_reg(rm + 1, 0);
5750 tmp5 = neon_load_reg(rm + 1, 1);
5751 for (pass = 0; pass < 2; pass++) {
5752 if (pass == 0) {
5753 tmp = neon_load_reg(rm, 0);
5754 } else {
5755 tmp = tmp4;
5756 }
5757 gen_neon_shift_narrow(size, tmp, tmp2, q,
5758 input_unsigned);
5759 if (pass == 0) {
5760 tmp3 = neon_load_reg(rm, 1);
5761 } else {
5762 tmp3 = tmp5;
5763 }
5764 gen_neon_shift_narrow(size, tmp3, tmp2, q,
5765 input_unsigned);
5766 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5767 tcg_temp_free_i32(tmp);
5768 tcg_temp_free_i32(tmp3);
5769 tmp = tcg_temp_new_i32();
5770 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5771 neon_store_reg(rd, pass, tmp);
5772 } /* for pass */
5773 tcg_temp_free_i32(tmp2);
5774 }
5775 } else if (op == 10) {
5776 /* VSHLL, VMOVL */
5777 if (q || (rd & 1)) {
5778 return 1;
5779 }
5780 tmp = neon_load_reg(rm, 0);
5781 tmp2 = neon_load_reg(rm, 1);
5782 for (pass = 0; pass < 2; pass++) {
5783 if (pass == 1)
5784 tmp = tmp2;
5785
5786 gen_neon_widen(cpu_V0, tmp, size, u);
5787
5788 if (shift != 0) {
5789 /* The shift is less than the width of the source
5790 type, so we can just shift the whole register. */
5791 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5792 /* Widen the result of shift: we need to clear
5793 * the potential overflow bits resulting from
5794 * left bits of the narrow input appearing as
5795 * right bits of left the neighbour narrow
5796 * input. */
5797 if (size < 2 || !u) {
5798 uint64_t imm64;
5799 if (size == 0) {
5800 imm = (0xffu >> (8 - shift));
5801 imm |= imm << 16;
5802 } else if (size == 1) {
5803 imm = 0xffff >> (16 - shift);
5804 } else {
5805 /* size == 2 */
5806 imm = 0xffffffff >> (32 - shift);
5807 }
5808 if (size < 2) {
5809 imm64 = imm | (((uint64_t)imm) << 32);
5810 } else {
5811 imm64 = imm;
5812 }
5813 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5814 }
5815 }
5816 neon_store_reg64(cpu_V0, rd + pass);
5817 }
5818 } else if (op >= 14) {
5819 /* VCVT fixed-point. */
5820 TCGv_ptr fpst;
5821 TCGv_i32 shiftv;
5822 VFPGenFixPointFn *fn;
5823
5824 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5825 return 1;
5826 }
5827
5828 if (!(op & 1)) {
5829 if (u) {
5830 fn = gen_helper_vfp_ultos;
5831 } else {
5832 fn = gen_helper_vfp_sltos;
5833 }
5834 } else {
5835 if (u) {
5836 fn = gen_helper_vfp_touls_round_to_zero;
5837 } else {
5838 fn = gen_helper_vfp_tosls_round_to_zero;
5839 }
5840 }
5841
5842 /* We have already masked out the must-be-1 top bit of imm6,
5843 * hence this 32-shift where the ARM ARM has 64-imm6.
5844 */
5845 shift = 32 - shift;
5846 fpst = get_fpstatus_ptr(1);
5847 shiftv = tcg_const_i32(shift);
5848 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5849 TCGv_i32 tmpf = neon_load_reg(rm, pass);
5850 fn(tmpf, tmpf, shiftv, fpst);
5851 neon_store_reg(rd, pass, tmpf);
5852 }
5853 tcg_temp_free_ptr(fpst);
5854 tcg_temp_free_i32(shiftv);
5855 } else {
5856 return 1;
5857 }
5858 } else { /* (insn & 0x00380080) == 0 */
5859 int invert, reg_ofs, vec_size;
5860
5861 if (q && (rd & 1)) {
5862 return 1;
5863 }
5864
5865 op = (insn >> 8) & 0xf;
5866 /* One register and immediate. */
5867 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5868 invert = (insn & (1 << 5)) != 0;
5869 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5870 * We choose to not special-case this and will behave as if a
5871 * valid constant encoding of 0 had been given.
5872 */
5873 switch (op) {
5874 case 0: case 1:
5875 /* no-op */
5876 break;
5877 case 2: case 3:
5878 imm <<= 8;
5879 break;
5880 case 4: case 5:
5881 imm <<= 16;
5882 break;
5883 case 6: case 7:
5884 imm <<= 24;
5885 break;
5886 case 8: case 9:
5887 imm |= imm << 16;
5888 break;
5889 case 10: case 11:
5890 imm = (imm << 8) | (imm << 24);
5891 break;
5892 case 12:
5893 imm = (imm << 8) | 0xff;
5894 break;
5895 case 13:
5896 imm = (imm << 16) | 0xffff;
5897 break;
5898 case 14:
5899 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5900 if (invert) {
5901 imm = ~imm;
5902 }
5903 break;
5904 case 15:
5905 if (invert) {
5906 return 1;
5907 }
5908 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5909 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5910 break;
5911 }
5912 if (invert) {
5913 imm = ~imm;
5914 }
5915
5916 reg_ofs = neon_reg_offset(rd, 0);
5917 vec_size = q ? 16 : 8;
5918
5919 if (op & 1 && op < 12) {
5920 if (invert) {
5921 /* The immediate value has already been inverted,
5922 * so BIC becomes AND.
5923 */
5924 tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5925 vec_size, vec_size);
5926 } else {
5927 tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5928 vec_size, vec_size);
5929 }
5930 } else {
5931 /* VMOV, VMVN. */
5932 if (op == 14 && invert) {
5933 TCGv_i64 t64 = tcg_temp_new_i64();
5934
5935 for (pass = 0; pass <= q; ++pass) {
5936 uint64_t val = 0;
5937 int n;
5938
5939 for (n = 0; n < 8; n++) {
5940 if (imm & (1 << (n + pass * 8))) {
5941 val |= 0xffull << (n * 8);
5942 }
5943 }
5944 tcg_gen_movi_i64(t64, val);
5945 neon_store_reg64(t64, rd + pass);
5946 }
5947 tcg_temp_free_i64(t64);
5948 } else {
5949 tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
5950 vec_size, imm);
5951 }
5952 }
5953 }
5954 } else { /* (insn & 0x00800010 == 0x00800000) */
5955 if (size != 3) {
5956 op = (insn >> 8) & 0xf;
5957 if ((insn & (1 << 6)) == 0) {
5958 /* Three registers of different lengths. */
5959 int src1_wide;
5960 int src2_wide;
5961 int prewiden;
5962 /* undefreq: bit 0 : UNDEF if size == 0
5963 * bit 1 : UNDEF if size == 1
5964 * bit 2 : UNDEF if size == 2
5965 * bit 3 : UNDEF if U == 1
5966 * Note that [2:0] set implies 'always UNDEF'
5967 */
5968 int undefreq;
5969 /* prewiden, src1_wide, src2_wide, undefreq */
5970 static const int neon_3reg_wide[16][4] = {
5971 {1, 0, 0, 0}, /* VADDL */
5972 {1, 1, 0, 0}, /* VADDW */
5973 {1, 0, 0, 0}, /* VSUBL */
5974 {1, 1, 0, 0}, /* VSUBW */
5975 {0, 1, 1, 0}, /* VADDHN */
5976 {0, 0, 0, 0}, /* VABAL */
5977 {0, 1, 1, 0}, /* VSUBHN */
5978 {0, 0, 0, 0}, /* VABDL */
5979 {0, 0, 0, 0}, /* VMLAL */
5980 {0, 0, 0, 9}, /* VQDMLAL */
5981 {0, 0, 0, 0}, /* VMLSL */
5982 {0, 0, 0, 9}, /* VQDMLSL */
5983 {0, 0, 0, 0}, /* Integer VMULL */
5984 {0, 0, 0, 9}, /* VQDMULL */
5985 {0, 0, 0, 0xa}, /* Polynomial VMULL */
5986 {0, 0, 0, 7}, /* Reserved: always UNDEF */
5987 };
5988
5989 prewiden = neon_3reg_wide[op][0];
5990 src1_wide = neon_3reg_wide[op][1];
5991 src2_wide = neon_3reg_wide[op][2];
5992 undefreq = neon_3reg_wide[op][3];
5993
5994 if ((undefreq & (1 << size)) ||
5995 ((undefreq & 8) && u)) {
5996 return 1;
5997 }
5998 if ((src1_wide && (rn & 1)) ||
5999 (src2_wide && (rm & 1)) ||
6000 (!src2_wide && (rd & 1))) {
6001 return 1;
6002 }
6003
6004 /* Handle polynomial VMULL in a single pass. */
6005 if (op == 14) {
6006 if (size == 0) {
6007 /* VMULL.P8 */
6008 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6009 0, gen_helper_neon_pmull_h);
6010 } else {
6011 /* VMULL.P64 */
6012 if (!dc_isar_feature(aa32_pmull, s)) {
6013 return 1;
6014 }
6015 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6016 0, gen_helper_gvec_pmull_q);
6017 }
6018 return 0;
6019 }
6020
6021 /* Avoid overlapping operands. Wide source operands are
6022 always aligned so will never overlap with wide
6023 destinations in problematic ways. */
6024 if (rd == rm && !src2_wide) {
6025 tmp = neon_load_reg(rm, 1);
6026 neon_store_scratch(2, tmp);
6027 } else if (rd == rn && !src1_wide) {
6028 tmp = neon_load_reg(rn, 1);
6029 neon_store_scratch(2, tmp);
6030 }
6031 tmp3 = NULL;
6032 for (pass = 0; pass < 2; pass++) {
6033 if (src1_wide) {
6034 neon_load_reg64(cpu_V0, rn + pass);
6035 tmp = NULL;
6036 } else {
6037 if (pass == 1 && rd == rn) {
6038 tmp = neon_load_scratch(2);
6039 } else {
6040 tmp = neon_load_reg(rn, pass);
6041 }
6042 if (prewiden) {
6043 gen_neon_widen(cpu_V0, tmp, size, u);
6044 }
6045 }
6046 if (src2_wide) {
6047 neon_load_reg64(cpu_V1, rm + pass);
6048 tmp2 = NULL;
6049 } else {
6050 if (pass == 1 && rd == rm) {
6051 tmp2 = neon_load_scratch(2);
6052 } else {
6053 tmp2 = neon_load_reg(rm, pass);
6054 }
6055 if (prewiden) {
6056 gen_neon_widen(cpu_V1, tmp2, size, u);
6057 }
6058 }
6059 switch (op) {
6060 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6061 gen_neon_addl(size);
6062 break;
6063 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6064 gen_neon_subl(size);
6065 break;
6066 case 5: case 7: /* VABAL, VABDL */
6067 switch ((size << 1) | u) {
6068 case 0:
6069 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6070 break;
6071 case 1:
6072 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6073 break;
6074 case 2:
6075 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6076 break;
6077 case 3:
6078 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6079 break;
6080 case 4:
6081 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6082 break;
6083 case 5:
6084 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6085 break;
6086 default: abort();
6087 }
6088 tcg_temp_free_i32(tmp2);
6089 tcg_temp_free_i32(tmp);
6090 break;
6091 case 8: case 9: case 10: case 11: case 12: case 13:
6092 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6093 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6094 break;
6095 default: /* 15 is RESERVED: caught earlier */
6096 abort();
6097 }
6098 if (op == 13) {
6099 /* VQDMULL */
6100 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6101 neon_store_reg64(cpu_V0, rd + pass);
6102 } else if (op == 5 || (op >= 8 && op <= 11)) {
6103 /* Accumulate. */
6104 neon_load_reg64(cpu_V1, rd + pass);
6105 switch (op) {
6106 case 10: /* VMLSL */
6107 gen_neon_negl(cpu_V0, size);
6108 /* Fall through */
6109 case 5: case 8: /* VABAL, VMLAL */
6110 gen_neon_addl(size);
6111 break;
6112 case 9: case 11: /* VQDMLAL, VQDMLSL */
6113 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6114 if (op == 11) {
6115 gen_neon_negl(cpu_V0, size);
6116 }
6117 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6118 break;
6119 default:
6120 abort();
6121 }
6122 neon_store_reg64(cpu_V0, rd + pass);
6123 } else if (op == 4 || op == 6) {
6124 /* Narrowing operation. */
6125 tmp = tcg_temp_new_i32();
6126 if (!u) {
6127 switch (size) {
6128 case 0:
6129 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6130 break;
6131 case 1:
6132 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6133 break;
6134 case 2:
6135 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6136 break;
6137 default: abort();
6138 }
6139 } else {
6140 switch (size) {
6141 case 0:
6142 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6143 break;
6144 case 1:
6145 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6146 break;
6147 case 2:
6148 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6149 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6150 break;
6151 default: abort();
6152 }
6153 }
6154 if (pass == 0) {
6155 tmp3 = tmp;
6156 } else {
6157 neon_store_reg(rd, 0, tmp3);
6158 neon_store_reg(rd, 1, tmp);
6159 }
6160 } else {
6161 /* Write back the result. */
6162 neon_store_reg64(cpu_V0, rd + pass);
6163 }
6164 }
6165 } else {
6166 /* Two registers and a scalar. NB that for ops of this form
6167 * the ARM ARM labels bit 24 as Q, but it is in our variable
6168 * 'u', not 'q'.
6169 */
6170 if (size == 0) {
6171 return 1;
6172 }
6173 switch (op) {
6174 case 1: /* Float VMLA scalar */
6175 case 5: /* Floating point VMLS scalar */
6176 case 9: /* Floating point VMUL scalar */
6177 if (size == 1) {
6178 return 1;
6179 }
6180 /* fall through */
6181 case 0: /* Integer VMLA scalar */
6182 case 4: /* Integer VMLS scalar */
6183 case 8: /* Integer VMUL scalar */
6184 case 12: /* VQDMULH scalar */
6185 case 13: /* VQRDMULH scalar */
6186 if (u && ((rd | rn) & 1)) {
6187 return 1;
6188 }
6189 tmp = neon_get_scalar(size, rm);
6190 neon_store_scratch(0, tmp);
6191 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6192 tmp = neon_load_scratch(0);
6193 tmp2 = neon_load_reg(rn, pass);
6194 if (op == 12) {
6195 if (size == 1) {
6196 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6197 } else {
6198 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6199 }
6200 } else if (op == 13) {
6201 if (size == 1) {
6202 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6203 } else {
6204 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6205 }
6206 } else if (op & 1) {
6207 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6208 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6209 tcg_temp_free_ptr(fpstatus);
6210 } else {
6211 switch (size) {
6212 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6213 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6214 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6215 default: abort();
6216 }
6217 }
6218 tcg_temp_free_i32(tmp2);
6219 if (op < 8) {
6220 /* Accumulate. */
6221 tmp2 = neon_load_reg(rd, pass);
6222 switch (op) {
6223 case 0:
6224 gen_neon_add(size, tmp, tmp2);
6225 break;
6226 case 1:
6227 {
6228 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6229 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6230 tcg_temp_free_ptr(fpstatus);
6231 break;
6232 }
6233 case 4:
6234 gen_neon_rsb(size, tmp, tmp2);
6235 break;
6236 case 5:
6237 {
6238 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6239 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6240 tcg_temp_free_ptr(fpstatus);
6241 break;
6242 }
6243 default:
6244 abort();
6245 }
6246 tcg_temp_free_i32(tmp2);
6247 }
6248 neon_store_reg(rd, pass, tmp);
6249 }
6250 break;
6251 case 3: /* VQDMLAL scalar */
6252 case 7: /* VQDMLSL scalar */
6253 case 11: /* VQDMULL scalar */
6254 if (u == 1) {
6255 return 1;
6256 }
6257 /* fall through */
6258 case 2: /* VMLAL sclar */
6259 case 6: /* VMLSL scalar */
6260 case 10: /* VMULL scalar */
6261 if (rd & 1) {
6262 return 1;
6263 }
6264 tmp2 = neon_get_scalar(size, rm);
6265 /* We need a copy of tmp2 because gen_neon_mull
6266 * deletes it during pass 0. */
6267 tmp4 = tcg_temp_new_i32();
6268 tcg_gen_mov_i32(tmp4, tmp2);
6269 tmp3 = neon_load_reg(rn, 1);
6270
6271 for (pass = 0; pass < 2; pass++) {
6272 if (pass == 0) {
6273 tmp = neon_load_reg(rn, 0);
6274 } else {
6275 tmp = tmp3;
6276 tmp2 = tmp4;
6277 }
6278 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6279 if (op != 11) {
6280 neon_load_reg64(cpu_V1, rd + pass);
6281 }
6282 switch (op) {
6283 case 6:
6284 gen_neon_negl(cpu_V0, size);
6285 /* Fall through */
6286 case 2:
6287 gen_neon_addl(size);
6288 break;
6289 case 3: case 7:
6290 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6291 if (op == 7) {
6292 gen_neon_negl(cpu_V0, size);
6293 }
6294 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6295 break;
6296 case 10:
6297 /* no-op */
6298 break;
6299 case 11:
6300 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6301 break;
6302 default:
6303 abort();
6304 }
6305 neon_store_reg64(cpu_V0, rd + pass);
6306 }
6307 break;
6308 case 14: /* VQRDMLAH scalar */
6309 case 15: /* VQRDMLSH scalar */
6310 {
6311 NeonGenThreeOpEnvFn *fn;
6312
6313 if (!dc_isar_feature(aa32_rdm, s)) {
6314 return 1;
6315 }
6316 if (u && ((rd | rn) & 1)) {
6317 return 1;
6318 }
6319 if (op == 14) {
6320 if (size == 1) {
6321 fn = gen_helper_neon_qrdmlah_s16;
6322 } else {
6323 fn = gen_helper_neon_qrdmlah_s32;
6324 }
6325 } else {
6326 if (size == 1) {
6327 fn = gen_helper_neon_qrdmlsh_s16;
6328 } else {
6329 fn = gen_helper_neon_qrdmlsh_s32;
6330 }
6331 }
6332
6333 tmp2 = neon_get_scalar(size, rm);
6334 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6335 tmp = neon_load_reg(rn, pass);
6336 tmp3 = neon_load_reg(rd, pass);
6337 fn(tmp, cpu_env, tmp, tmp2, tmp3);
6338 tcg_temp_free_i32(tmp3);
6339 neon_store_reg(rd, pass, tmp);
6340 }
6341 tcg_temp_free_i32(tmp2);
6342 }
6343 break;
6344 default:
6345 g_assert_not_reached();
6346 }
6347 }
6348 } else { /* size == 3 */
6349 if (!u) {
6350 /* Extract. */
6351 imm = (insn >> 8) & 0xf;
6352
6353 if (imm > 7 && !q)
6354 return 1;
6355
6356 if (q && ((rd | rn | rm) & 1)) {
6357 return 1;
6358 }
6359
6360 if (imm == 0) {
6361 neon_load_reg64(cpu_V0, rn);
6362 if (q) {
6363 neon_load_reg64(cpu_V1, rn + 1);
6364 }
6365 } else if (imm == 8) {
6366 neon_load_reg64(cpu_V0, rn + 1);
6367 if (q) {
6368 neon_load_reg64(cpu_V1, rm);
6369 }
6370 } else if (q) {
6371 tmp64 = tcg_temp_new_i64();
6372 if (imm < 8) {
6373 neon_load_reg64(cpu_V0, rn);
6374 neon_load_reg64(tmp64, rn + 1);
6375 } else {
6376 neon_load_reg64(cpu_V0, rn + 1);
6377 neon_load_reg64(tmp64, rm);
6378 }
6379 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6380 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6381 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6382 if (imm < 8) {
6383 neon_load_reg64(cpu_V1, rm);
6384 } else {
6385 neon_load_reg64(cpu_V1, rm + 1);
6386 imm -= 8;
6387 }
6388 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6389 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6390 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6391 tcg_temp_free_i64(tmp64);
6392 } else {
6393 /* BUGFIX */
6394 neon_load_reg64(cpu_V0, rn);
6395 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6396 neon_load_reg64(cpu_V1, rm);
6397 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6398 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6399 }
6400 neon_store_reg64(cpu_V0, rd);
6401 if (q) {
6402 neon_store_reg64(cpu_V1, rd + 1);
6403 }
6404 } else if ((insn & (1 << 11)) == 0) {
6405 /* Two register misc. */
6406 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6407 size = (insn >> 18) & 3;
6408 /* UNDEF for unknown op values and bad op-size combinations */
6409 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6410 return 1;
6411 }
6412 if (neon_2rm_is_v8_op(op) &&
6413 !arm_dc_feature(s, ARM_FEATURE_V8)) {
6414 return 1;
6415 }
6416 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6417 q && ((rm | rd) & 1)) {
6418 return 1;
6419 }
6420 switch (op) {
6421 case NEON_2RM_VREV64:
6422 for (pass = 0; pass < (q ? 2 : 1); pass++) {
6423 tmp = neon_load_reg(rm, pass * 2);
6424 tmp2 = neon_load_reg(rm, pass * 2 + 1);
6425 switch (size) {
6426 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6427 case 1: gen_swap_half(tmp); break;
6428 case 2: /* no-op */ break;
6429 default: abort();
6430 }
6431 neon_store_reg(rd, pass * 2 + 1, tmp);
6432 if (size == 2) {
6433 neon_store_reg(rd, pass * 2, tmp2);
6434 } else {
6435 switch (size) {
6436 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6437 case 1: gen_swap_half(tmp2); break;
6438 default: abort();
6439 }
6440 neon_store_reg(rd, pass * 2, tmp2);
6441 }
6442 }
6443 break;
6444 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6445 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6446 for (pass = 0; pass < q + 1; pass++) {
6447 tmp = neon_load_reg(rm, pass * 2);
6448 gen_neon_widen(cpu_V0, tmp, size, op & 1);
6449 tmp = neon_load_reg(rm, pass * 2 + 1);
6450 gen_neon_widen(cpu_V1, tmp, size, op & 1);
6451 switch (size) {
6452 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6453 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6454 case 2: tcg_gen_add_i64(CPU_V001); break;
6455 default: abort();
6456 }
6457 if (op >= NEON_2RM_VPADAL) {
6458 /* Accumulate. */
6459 neon_load_reg64(cpu_V1, rd + pass);
6460 gen_neon_addl(size);
6461 }
6462 neon_store_reg64(cpu_V0, rd + pass);
6463 }
6464 break;
6465 case NEON_2RM_VTRN:
6466 if (size == 2) {
6467 int n;
6468 for (n = 0; n < (q ? 4 : 2); n += 2) {
6469 tmp = neon_load_reg(rm, n);
6470 tmp2 = neon_load_reg(rd, n + 1);
6471 neon_store_reg(rm, n, tmp2);
6472 neon_store_reg(rd, n + 1, tmp);
6473 }
6474 } else {
6475 goto elementwise;
6476 }
6477 break;
6478 case NEON_2RM_VUZP:
6479 if (gen_neon_unzip(rd, rm, size, q)) {
6480 return 1;
6481 }
6482 break;
6483 case NEON_2RM_VZIP:
6484 if (gen_neon_zip(rd, rm, size, q)) {
6485 return 1;
6486 }
6487 break;
6488 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6489 /* also VQMOVUN; op field and mnemonics don't line up */
6490 if (rm & 1) {
6491 return 1;
6492 }
6493 tmp2 = NULL;
6494 for (pass = 0; pass < 2; pass++) {
6495 neon_load_reg64(cpu_V0, rm + pass);
6496 tmp = tcg_temp_new_i32();
6497 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6498 tmp, cpu_V0);
6499 if (pass == 0) {
6500 tmp2 = tmp;
6501 } else {
6502 neon_store_reg(rd, 0, tmp2);
6503 neon_store_reg(rd, 1, tmp);
6504 }
6505 }
6506 break;
6507 case NEON_2RM_VSHLL:
6508 if (q || (rd & 1)) {
6509 return 1;
6510 }
6511 tmp = neon_load_reg(rm, 0);
6512 tmp2 = neon_load_reg(rm, 1);
6513 for (pass = 0; pass < 2; pass++) {
6514 if (pass == 1)
6515 tmp = tmp2;
6516 gen_neon_widen(cpu_V0, tmp, size, 1);
6517 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6518 neon_store_reg64(cpu_V0, rd + pass);
6519 }
6520 break;
6521 case NEON_2RM_VCVT_F16_F32:
6522 {
6523 TCGv_ptr fpst;
6524 TCGv_i32 ahp;
6525
6526 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6527 q || (rm & 1)) {
6528 return 1;
6529 }
6530 fpst = get_fpstatus_ptr(true);
6531 ahp = get_ahp_flag();
6532 tmp = neon_load_reg(rm, 0);
6533 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6534 tmp2 = neon_load_reg(rm, 1);
6535 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6536 tcg_gen_shli_i32(tmp2, tmp2, 16);
6537 tcg_gen_or_i32(tmp2, tmp2, tmp);
6538 tcg_temp_free_i32(tmp);
6539 tmp = neon_load_reg(rm, 2);
6540 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6541 tmp3 = neon_load_reg(rm, 3);
6542 neon_store_reg(rd, 0, tmp2);
6543 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6544 tcg_gen_shli_i32(tmp3, tmp3, 16);
6545 tcg_gen_or_i32(tmp3, tmp3, tmp);
6546 neon_store_reg(rd, 1, tmp3);
6547 tcg_temp_free_i32(tmp);
6548 tcg_temp_free_i32(ahp);
6549 tcg_temp_free_ptr(fpst);
6550 break;
6551 }
6552 case NEON_2RM_VCVT_F32_F16:
6553 {
6554 TCGv_ptr fpst;
6555 TCGv_i32 ahp;
6556 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6557 q || (rd & 1)) {
6558 return 1;
6559 }
6560 fpst = get_fpstatus_ptr(true);
6561 ahp = get_ahp_flag();
6562 tmp3 = tcg_temp_new_i32();
6563 tmp = neon_load_reg(rm, 0);
6564 tmp2 = neon_load_reg(rm, 1);
6565 tcg_gen_ext16u_i32(tmp3, tmp);
6566 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6567 neon_store_reg(rd, 0, tmp3);
6568 tcg_gen_shri_i32(tmp, tmp, 16);
6569 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6570 neon_store_reg(rd, 1, tmp);
6571 tmp3 = tcg_temp_new_i32();
6572 tcg_gen_ext16u_i32(tmp3, tmp2);
6573 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6574 neon_store_reg(rd, 2, tmp3);
6575 tcg_gen_shri_i32(tmp2, tmp2, 16);
6576 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6577 neon_store_reg(rd, 3, tmp2);
6578 tcg_temp_free_i32(ahp);
6579 tcg_temp_free_ptr(fpst);
6580 break;
6581 }
6582 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6583 if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6584 return 1;
6585 }
6586 ptr1 = vfp_reg_ptr(true, rd);
6587 ptr2 = vfp_reg_ptr(true, rm);
6588
6589 /* Bit 6 is the lowest opcode bit; it distinguishes between
6590 * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6591 */
6592 tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6593
6594 if (op == NEON_2RM_AESE) {
6595 gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6596 } else {
6597 gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6598 }
6599 tcg_temp_free_ptr(ptr1);
6600 tcg_temp_free_ptr(ptr2);
6601 tcg_temp_free_i32(tmp3);
6602 break;
6603 case NEON_2RM_SHA1H:
6604 if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6605 return 1;
6606 }
6607 ptr1 = vfp_reg_ptr(true, rd);
6608 ptr2 = vfp_reg_ptr(true, rm);
6609
6610 gen_helper_crypto_sha1h(ptr1, ptr2);
6611
6612 tcg_temp_free_ptr(ptr1);
6613 tcg_temp_free_ptr(ptr2);
6614 break;
6615 case NEON_2RM_SHA1SU1:
6616 if ((rm | rd) & 1) {
6617 return 1;
6618 }
6619 /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6620 if (q) {
6621 if (!dc_isar_feature(aa32_sha2, s)) {
6622 return 1;
6623 }
6624 } else if (!dc_isar_feature(aa32_sha1, s)) {
6625 return 1;
6626 }
6627 ptr1 = vfp_reg_ptr(true, rd);
6628 ptr2 = vfp_reg_ptr(true, rm);
6629 if (q) {
6630 gen_helper_crypto_sha256su0(ptr1, ptr2);
6631 } else {
6632 gen_helper_crypto_sha1su1(ptr1, ptr2);
6633 }
6634 tcg_temp_free_ptr(ptr1);
6635 tcg_temp_free_ptr(ptr2);
6636 break;
6637
6638 case NEON_2RM_VMVN:
6639 tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6640 break;
6641 case NEON_2RM_VNEG:
6642 tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6643 break;
6644 case NEON_2RM_VABS:
6645 tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6646 break;
6647
6648 case NEON_2RM_VCEQ0:
6649 gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6650 break;
6651 case NEON_2RM_VCGT0:
6652 gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6653 break;
6654 case NEON_2RM_VCLE0:
6655 gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6656 break;
6657 case NEON_2RM_VCGE0:
6658 gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6659 break;
6660 case NEON_2RM_VCLT0:
6661 gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6662 break;
6663
6664 default:
6665 elementwise:
6666 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6667 tmp = neon_load_reg(rm, pass);
6668 switch (op) {
6669 case NEON_2RM_VREV32:
6670 switch (size) {
6671 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6672 case 1: gen_swap_half(tmp); break;
6673 default: abort();
6674 }
6675 break;
6676 case NEON_2RM_VREV16:
6677 gen_rev16(tmp, tmp);
6678 break;
6679 case NEON_2RM_VCLS:
6680 switch (size) {
6681 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6682 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6683 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6684 default: abort();
6685 }
6686 break;
6687 case NEON_2RM_VCLZ:
6688 switch (size) {
6689 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6690 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6691 case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6692 default: abort();
6693 }
6694 break;
6695 case NEON_2RM_VCNT:
6696 gen_helper_neon_cnt_u8(tmp, tmp);
6697 break;
6698 case NEON_2RM_VQABS:
6699 switch (size) {
6700 case 0:
6701 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6702 break;
6703 case 1:
6704 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6705 break;
6706 case 2:
6707 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6708 break;
6709 default: abort();
6710 }
6711 break;
6712 case NEON_2RM_VQNEG:
6713 switch (size) {
6714 case 0:
6715 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6716 break;
6717 case 1:
6718 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6719 break;
6720 case 2:
6721 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6722 break;
6723 default: abort();
6724 }
6725 break;
6726 case NEON_2RM_VCGT0_F:
6727 {
6728 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6729 tmp2 = tcg_const_i32(0);
6730 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6731 tcg_temp_free_i32(tmp2);
6732 tcg_temp_free_ptr(fpstatus);
6733 break;
6734 }
6735 case NEON_2RM_VCGE0_F:
6736 {
6737 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6738 tmp2 = tcg_const_i32(0);
6739 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6740 tcg_temp_free_i32(tmp2);
6741 tcg_temp_free_ptr(fpstatus);
6742 break;
6743 }
6744 case NEON_2RM_VCEQ0_F:
6745 {
6746 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6747 tmp2 = tcg_const_i32(0);
6748 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6749 tcg_temp_free_i32(tmp2);
6750 tcg_temp_free_ptr(fpstatus);
6751 break;
6752 }
6753 case NEON_2RM_VCLE0_F:
6754 {
6755 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6756 tmp2 = tcg_const_i32(0);
6757 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6758 tcg_temp_free_i32(tmp2);
6759 tcg_temp_free_ptr(fpstatus);
6760 break;
6761 }
6762 case NEON_2RM_VCLT0_F:
6763 {
6764 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6765 tmp2 = tcg_const_i32(0);
6766 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6767 tcg_temp_free_i32(tmp2);
6768 tcg_temp_free_ptr(fpstatus);
6769 break;
6770 }
6771 case NEON_2RM_VABS_F:
6772 gen_helper_vfp_abss(tmp, tmp);
6773 break;
6774 case NEON_2RM_VNEG_F:
6775 gen_helper_vfp_negs(tmp, tmp);
6776 break;
6777 case NEON_2RM_VSWP:
6778 tmp2 = neon_load_reg(rd, pass);
6779 neon_store_reg(rm, pass, tmp2);
6780 break;
6781 case NEON_2RM_VTRN:
6782 tmp2 = neon_load_reg(rd, pass);
6783 switch (size) {
6784 case 0: gen_neon_trn_u8(tmp, tmp2); break;
6785 case 1: gen_neon_trn_u16(tmp, tmp2); break;
6786 default: abort();
6787 }
6788 neon_store_reg(rm, pass, tmp2);
6789 break;
6790 case NEON_2RM_VRINTN:
6791 case NEON_2RM_VRINTA:
6792 case NEON_2RM_VRINTM:
6793 case NEON_2RM_VRINTP:
6794 case NEON_2RM_VRINTZ:
6795 {
6796 TCGv_i32 tcg_rmode;
6797 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6798 int rmode;
6799
6800 if (op == NEON_2RM_VRINTZ) {
6801 rmode = FPROUNDING_ZERO;
6802 } else {
6803 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6804 }
6805
6806 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6807 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6808 cpu_env);
6809 gen_helper_rints(tmp, tmp, fpstatus);
6810 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6811 cpu_env);
6812 tcg_temp_free_ptr(fpstatus);
6813 tcg_temp_free_i32(tcg_rmode);
6814 break;
6815 }
6816 case NEON_2RM_VRINTX:
6817 {
6818 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6819 gen_helper_rints_exact(tmp, tmp, fpstatus);
6820 tcg_temp_free_ptr(fpstatus);
6821 break;
6822 }
6823 case NEON_2RM_VCVTAU:
6824 case NEON_2RM_VCVTAS:
6825 case NEON_2RM_VCVTNU:
6826 case NEON_2RM_VCVTNS:
6827 case NEON_2RM_VCVTPU:
6828 case NEON_2RM_VCVTPS:
6829 case NEON_2RM_VCVTMU:
6830 case NEON_2RM_VCVTMS:
6831 {
6832 bool is_signed = !extract32(insn, 7, 1);
6833 TCGv_ptr fpst = get_fpstatus_ptr(1);
6834 TCGv_i32 tcg_rmode, tcg_shift;
6835 int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6836
6837 tcg_shift = tcg_const_i32(0);
6838 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6839 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6840 cpu_env);
6841
6842 if (is_signed) {
6843 gen_helper_vfp_tosls(tmp, tmp,
6844 tcg_shift, fpst);
6845 } else {
6846 gen_helper_vfp_touls(tmp, tmp,
6847 tcg_shift, fpst);
6848 }
6849
6850 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6851 cpu_env);
6852 tcg_temp_free_i32(tcg_rmode);
6853 tcg_temp_free_i32(tcg_shift);
6854 tcg_temp_free_ptr(fpst);
6855 break;
6856 }
6857 case NEON_2RM_VRECPE:
6858 gen_helper_recpe_u32(tmp, tmp);
6859 break;
6860 case NEON_2RM_VRSQRTE:
6861 gen_helper_rsqrte_u32(tmp, tmp);
6862 break;
6863 case NEON_2RM_VRECPE_F:
6864 {
6865 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6866 gen_helper_recpe_f32(tmp, tmp, fpstatus);
6867 tcg_temp_free_ptr(fpstatus);
6868 break;
6869 }
6870 case NEON_2RM_VRSQRTE_F:
6871 {
6872 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6873 gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6874 tcg_temp_free_ptr(fpstatus);
6875 break;
6876 }
6877 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6878 {
6879 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6880 gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6881 tcg_temp_free_ptr(fpstatus);
6882 break;
6883 }
6884 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6885 {
6886 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6887 gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6888 tcg_temp_free_ptr(fpstatus);
6889 break;
6890 }
6891 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6892 {
6893 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6894 gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6895 tcg_temp_free_ptr(fpstatus);
6896 break;
6897 }
6898 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6899 {
6900 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6901 gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6902 tcg_temp_free_ptr(fpstatus);
6903 break;
6904 }
6905 default:
6906 /* Reserved op values were caught by the
6907 * neon_2rm_sizes[] check earlier.
6908 */
6909 abort();
6910 }
6911 neon_store_reg(rd, pass, tmp);
6912 }
6913 break;
6914 }
6915 } else if ((insn & (1 << 10)) == 0) {
6916 /* VTBL, VTBX. */
6917 int n = ((insn >> 8) & 3) + 1;
6918 if ((rn + n) > 32) {
6919 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6920 * helper function running off the end of the register file.
6921 */
6922 return 1;
6923 }
6924 n <<= 3;
6925 if (insn & (1 << 6)) {
6926 tmp = neon_load_reg(rd, 0);
6927 } else {
6928 tmp = tcg_temp_new_i32();
6929 tcg_gen_movi_i32(tmp, 0);
6930 }
6931 tmp2 = neon_load_reg(rm, 0);
6932 ptr1 = vfp_reg_ptr(true, rn);
6933 tmp5 = tcg_const_i32(n);
6934 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6935 tcg_temp_free_i32(tmp);
6936 if (insn & (1 << 6)) {
6937 tmp = neon_load_reg(rd, 1);
6938 } else {
6939 tmp = tcg_temp_new_i32();
6940 tcg_gen_movi_i32(tmp, 0);
6941 }
6942 tmp3 = neon_load_reg(rm, 1);
6943 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6944 tcg_temp_free_i32(tmp5);
6945 tcg_temp_free_ptr(ptr1);
6946 neon_store_reg(rd, 0, tmp2);
6947 neon_store_reg(rd, 1, tmp3);
6948 tcg_temp_free_i32(tmp);
6949 } else if ((insn & 0x380) == 0) {
6950 /* VDUP */
6951 int element;
6952 MemOp size;
6953
6954 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6955 return 1;
6956 }
6957 if (insn & (1 << 16)) {
6958 size = MO_8;
6959 element = (insn >> 17) & 7;
6960 } else if (insn & (1 << 17)) {
6961 size = MO_16;
6962 element = (insn >> 18) & 3;
6963 } else {
6964 size = MO_32;
6965 element = (insn >> 19) & 1;
6966 }
6967 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6968 neon_element_offset(rm, element, size),
6969 q ? 16 : 8, q ? 16 : 8);
6970 } else {
6971 return 1;
6972 }
6973 }
6974 }
6975 return 0;
6976 }
6977
6978 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6979 {
6980 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6981 const ARMCPRegInfo *ri;
6982
6983 cpnum = (insn >> 8) & 0xf;
6984
6985 /* First check for coprocessor space used for XScale/iwMMXt insns */
6986 if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
6987 if (extract32(s->c15_cpar, cpnum, 1) == 0) {
6988 return 1;
6989 }
6990 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6991 return disas_iwmmxt_insn(s, insn);
6992 } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6993 return disas_dsp_insn(s, insn);
6994 }
6995 return 1;
6996 }
6997
6998 /* Otherwise treat as a generic register access */
6999 is64 = (insn & (1 << 25)) == 0;
7000 if (!is64 && ((insn & (1 << 4)) == 0)) {
7001 /* cdp */
7002 return 1;
7003 }
7004
7005 crm = insn & 0xf;
7006 if (is64) {
7007 crn = 0;
7008 opc1 = (insn >> 4) & 0xf;
7009 opc2 = 0;
7010 rt2 = (insn >> 16) & 0xf;
7011 } else {
7012 crn = (insn >> 16) & 0xf;
7013 opc1 = (insn >> 21) & 7;
7014 opc2 = (insn >> 5) & 7;
7015 rt2 = 0;
7016 }
7017 isread = (insn >> 20) & 1;
7018 rt = (insn >> 12) & 0xf;
7019
7020 ri = get_arm_cp_reginfo(s->cp_regs,
7021 ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7022 if (ri) {
7023 bool need_exit_tb;
7024
7025 /* Check access permissions */
7026 if (!cp_access_ok(s->current_el, ri, isread)) {
7027 return 1;
7028 }
7029
7030 if (s->hstr_active || ri->accessfn ||
7031 (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7032 /* Emit code to perform further access permissions checks at
7033 * runtime; this may result in an exception.
7034 * Note that on XScale all cp0..c13 registers do an access check
7035 * call in order to handle c15_cpar.
7036 */
7037 TCGv_ptr tmpptr;
7038 TCGv_i32 tcg_syn, tcg_isread;
7039 uint32_t syndrome;
7040
7041 /* Note that since we are an implementation which takes an
7042 * exception on a trapped conditional instruction only if the
7043 * instruction passes its condition code check, we can take
7044 * advantage of the clause in the ARM ARM that allows us to set
7045 * the COND field in the instruction to 0xE in all cases.
7046 * We could fish the actual condition out of the insn (ARM)
7047 * or the condexec bits (Thumb) but it isn't necessary.
7048 */
7049 switch (cpnum) {
7050 case 14:
7051 if (is64) {
7052 syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7053 isread, false);
7054 } else {
7055 syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7056 rt, isread, false);
7057 }
7058 break;
7059 case 15:
7060 if (is64) {
7061 syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7062 isread, false);
7063 } else {
7064 syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7065 rt, isread, false);
7066 }
7067 break;
7068 default:
7069 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7070 * so this can only happen if this is an ARMv7 or earlier CPU,
7071 * in which case the syndrome information won't actually be
7072 * guest visible.
7073 */
7074 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7075 syndrome = syn_uncategorized();
7076 break;
7077 }
7078
7079 gen_set_condexec(s);
7080 gen_set_pc_im(s, s->pc_curr);
7081 tmpptr = tcg_const_ptr(ri);
7082 tcg_syn = tcg_const_i32(syndrome);
7083 tcg_isread = tcg_const_i32(isread);
7084 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
7085 tcg_isread);
7086 tcg_temp_free_ptr(tmpptr);
7087 tcg_temp_free_i32(tcg_syn);
7088 tcg_temp_free_i32(tcg_isread);
7089 } else if (ri->type & ARM_CP_RAISES_EXC) {
7090 /*
7091 * The readfn or writefn might raise an exception;
7092 * synchronize the CPU state in case it does.
7093 */
7094 gen_set_condexec(s);
7095 gen_set_pc_im(s, s->pc_curr);
7096 }
7097
7098 /* Handle special cases first */
7099 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7100 case ARM_CP_NOP:
7101 return 0;
7102 case ARM_CP_WFI:
7103 if (isread) {
7104 return 1;
7105 }
7106 gen_set_pc_im(s, s->base.pc_next);
7107 s->base.is_jmp = DISAS_WFI;
7108 return 0;
7109 default:
7110 break;
7111 }
7112
7113 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7114 gen_io_start();
7115 }
7116
7117 if (isread) {
7118 /* Read */
7119 if (is64) {
7120 TCGv_i64 tmp64;
7121 TCGv_i32 tmp;
7122 if (ri->type & ARM_CP_CONST) {
7123 tmp64 = tcg_const_i64(ri->resetvalue);
7124 } else if (ri->readfn) {
7125 TCGv_ptr tmpptr;
7126 tmp64 = tcg_temp_new_i64();
7127 tmpptr = tcg_const_ptr(ri);
7128 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7129 tcg_temp_free_ptr(tmpptr);
7130 } else {
7131 tmp64 = tcg_temp_new_i64();
7132 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7133 }
7134 tmp = tcg_temp_new_i32();
7135 tcg_gen_extrl_i64_i32(tmp, tmp64);
7136 store_reg(s, rt, tmp);
7137 tmp = tcg_temp_new_i32();
7138 tcg_gen_extrh_i64_i32(tmp, tmp64);
7139 tcg_temp_free_i64(tmp64);
7140 store_reg(s, rt2, tmp);
7141 } else {
7142 TCGv_i32 tmp;
7143 if (ri->type & ARM_CP_CONST) {
7144 tmp = tcg_const_i32(ri->resetvalue);
7145 } else if (ri->readfn) {
7146 TCGv_ptr tmpptr;
7147 tmp = tcg_temp_new_i32();
7148 tmpptr = tcg_const_ptr(ri);
7149 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7150 tcg_temp_free_ptr(tmpptr);
7151 } else {
7152 tmp = load_cpu_offset(ri->fieldoffset);
7153 }
7154 if (rt == 15) {
7155 /* Destination register of r15 for 32 bit loads sets
7156 * the condition codes from the high 4 bits of the value
7157 */
7158 gen_set_nzcv(tmp);
7159 tcg_temp_free_i32(tmp);
7160 } else {
7161 store_reg(s, rt, tmp);
7162 }
7163 }
7164 } else {
7165 /* Write */
7166 if (ri->type & ARM_CP_CONST) {
7167 /* If not forbidden by access permissions, treat as WI */
7168 return 0;
7169 }
7170
7171 if (is64) {
7172 TCGv_i32 tmplo, tmphi;
7173 TCGv_i64 tmp64 = tcg_temp_new_i64();
7174 tmplo = load_reg(s, rt);
7175 tmphi = load_reg(s, rt2);
7176 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7177 tcg_temp_free_i32(tmplo);
7178 tcg_temp_free_i32(tmphi);
7179 if (ri->writefn) {
7180 TCGv_ptr tmpptr = tcg_const_ptr(ri);
7181 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7182 tcg_temp_free_ptr(tmpptr);
7183 } else {
7184 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7185 }
7186 tcg_temp_free_i64(tmp64);
7187 } else {
7188 if (ri->writefn) {
7189 TCGv_i32 tmp;
7190 TCGv_ptr tmpptr;
7191 tmp = load_reg(s, rt);
7192 tmpptr = tcg_const_ptr(ri);
7193 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7194 tcg_temp_free_ptr(tmpptr);
7195 tcg_temp_free_i32(tmp);
7196 } else {
7197 TCGv_i32 tmp = load_reg(s, rt);
7198 store_cpu_offset(tmp, ri->fieldoffset);
7199 }
7200 }
7201 }
7202
7203 /* I/O operations must end the TB here (whether read or write) */
7204 need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
7205 (ri->type & ARM_CP_IO));
7206
7207 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7208 /*
7209 * A write to any coprocessor register that ends a TB
7210 * must rebuild the hflags for the next TB.
7211 */
7212 TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
7213 if (arm_dc_feature(s, ARM_FEATURE_M)) {
7214 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
7215 } else {
7216 if (ri->type & ARM_CP_NEWEL) {
7217 gen_helper_rebuild_hflags_a32_newel(cpu_env);
7218 } else {
7219 gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
7220 }
7221 }
7222 tcg_temp_free_i32(tcg_el);
7223 /*
7224 * We default to ending the TB on a coprocessor register write,
7225 * but allow this to be suppressed by the register definition
7226 * (usually only necessary to work around guest bugs).
7227 */
7228 need_exit_tb = true;
7229 }
7230 if (need_exit_tb) {
7231 gen_lookup_tb(s);
7232 }
7233
7234 return 0;
7235 }
7236
7237 /* Unknown register; this might be a guest error or a QEMU
7238 * unimplemented feature.
7239 */
7240 if (is64) {
7241 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7242 "64 bit system register cp:%d opc1: %d crm:%d "
7243 "(%s)\n",
7244 isread ? "read" : "write", cpnum, opc1, crm,
7245 s->ns ? "non-secure" : "secure");
7246 } else {
7247 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7248 "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7249 "(%s)\n",
7250 isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7251 s->ns ? "non-secure" : "secure");
7252 }
7253
7254 return 1;
7255 }
7256
7257
7258 /* Store a 64-bit value to a register pair. Clobbers val. */
7259 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7260 {
7261 TCGv_i32 tmp;
7262 tmp = tcg_temp_new_i32();
7263 tcg_gen_extrl_i64_i32(tmp, val);
7264 store_reg(s, rlow, tmp);
7265 tmp = tcg_temp_new_i32();
7266 tcg_gen_extrh_i64_i32(tmp, val);
7267 store_reg(s, rhigh, tmp);
7268 }
7269
7270 /* load and add a 64-bit value from a register pair. */
7271 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7272 {
7273 TCGv_i64 tmp;
7274 TCGv_i32 tmpl;
7275 TCGv_i32 tmph;
7276
7277 /* Load 64-bit value rd:rn. */
7278 tmpl = load_reg(s, rlow);
7279 tmph = load_reg(s, rhigh);
7280 tmp = tcg_temp_new_i64();
7281 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7282 tcg_temp_free_i32(tmpl);
7283 tcg_temp_free_i32(tmph);
7284 tcg_gen_add_i64(val, val, tmp);
7285 tcg_temp_free_i64(tmp);
7286 }
7287
7288 /* Set N and Z flags from hi|lo. */
7289 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7290 {
7291 tcg_gen_mov_i32(cpu_NF, hi);
7292 tcg_gen_or_i32(cpu_ZF, lo, hi);
7293 }
7294
7295 /* Load/Store exclusive instructions are implemented by remembering
7296 the value/address loaded, and seeing if these are the same
7297 when the store is performed. This should be sufficient to implement
7298 the architecturally mandated semantics, and avoids having to monitor
7299 regular stores. The compare vs the remembered value is done during
7300 the cmpxchg operation, but we must compare the addresses manually. */
7301 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7302 TCGv_i32 addr, int size)
7303 {
7304 TCGv_i32 tmp = tcg_temp_new_i32();
7305 MemOp opc = size | MO_ALIGN | s->be_data;
7306
7307 s->is_ldex = true;
7308
7309 if (size == 3) {
7310 TCGv_i32 tmp2 = tcg_temp_new_i32();
7311 TCGv_i64 t64 = tcg_temp_new_i64();
7312
7313 /* For AArch32, architecturally the 32-bit word at the lowest
7314 * address is always Rt and the one at addr+4 is Rt2, even if
7315 * the CPU is big-endian. That means we don't want to do a
7316 * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7317 * for an architecturally 64-bit access, but instead do a
7318 * 64-bit access using MO_BE if appropriate and then split
7319 * the two halves.
7320 * This only makes a difference for BE32 user-mode, where
7321 * frob64() must not flip the two halves of the 64-bit data
7322 * but this code must treat BE32 user-mode like BE32 system.
7323 */
7324 TCGv taddr = gen_aa32_addr(s, addr, opc);
7325
7326 tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7327 tcg_temp_free(taddr);
7328 tcg_gen_mov_i64(cpu_exclusive_val, t64);
7329 if (s->be_data == MO_BE) {
7330 tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7331 } else {
7332 tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7333 }
7334 tcg_temp_free_i64(t64);
7335
7336 store_reg(s, rt2, tmp2);
7337 } else {
7338 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7339 tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7340 }
7341
7342 store_reg(s, rt, tmp);
7343 tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7344 }
7345
7346 static void gen_clrex(DisasContext *s)
7347 {
7348 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7349 }
7350
7351 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7352 TCGv_i32 addr, int size)
7353 {
7354 TCGv_i32 t0, t1, t2;
7355 TCGv_i64 extaddr;
7356 TCGv taddr;
7357 TCGLabel *done_label;
7358 TCGLabel *fail_label;
7359 MemOp opc = size | MO_ALIGN | s->be_data;
7360
7361 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7362 [addr] = {Rt};
7363 {Rd} = 0;
7364 } else {
7365 {Rd} = 1;
7366 } */
7367 fail_label = gen_new_label();
7368 done_label = gen_new_label();
7369 extaddr = tcg_temp_new_i64();
7370 tcg_gen_extu_i32_i64(extaddr, addr);
7371 tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7372 tcg_temp_free_i64(extaddr);
7373
7374 taddr = gen_aa32_addr(s, addr, opc);
7375 t0 = tcg_temp_new_i32();
7376 t1 = load_reg(s, rt);
7377 if (size == 3) {
7378 TCGv_i64 o64 = tcg_temp_new_i64();
7379 TCGv_i64 n64 = tcg_temp_new_i64();
7380
7381 t2 = load_reg(s, rt2);
7382 /* For AArch32, architecturally the 32-bit word at the lowest
7383 * address is always Rt and the one at addr+4 is Rt2, even if
7384 * the CPU is big-endian. Since we're going to treat this as a
7385 * single 64-bit BE store, we need to put the two halves in the
7386 * opposite order for BE to LE, so that they end up in the right
7387 * places.
7388 * We don't want gen_aa32_frob64() because that does the wrong
7389 * thing for BE32 usermode.
7390 */
7391 if (s->be_data == MO_BE) {
7392 tcg_gen_concat_i32_i64(n64, t2, t1);
7393 } else {
7394 tcg_gen_concat_i32_i64(n64, t1, t2);
7395 }
7396 tcg_temp_free_i32(t2);
7397
7398 tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7399 get_mem_index(s), opc);
7400 tcg_temp_free_i64(n64);
7401
7402 tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7403 tcg_gen_extrl_i64_i32(t0, o64);
7404
7405 tcg_temp_free_i64(o64);
7406 } else {
7407 t2 = tcg_temp_new_i32();
7408 tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7409 tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7410 tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7411 tcg_temp_free_i32(t2);
7412 }
7413 tcg_temp_free_i32(t1);
7414 tcg_temp_free(taddr);
7415 tcg_gen_mov_i32(cpu_R[rd], t0);
7416 tcg_temp_free_i32(t0);
7417 tcg_gen_br(done_label);
7418
7419 gen_set_label(fail_label);
7420 tcg_gen_movi_i32(cpu_R[rd], 1);
7421 gen_set_label(done_label);
7422 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7423 }
7424
7425 /* gen_srs:
7426 * @env: CPUARMState
7427 * @s: DisasContext
7428 * @mode: mode field from insn (which stack to store to)
7429 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7430 * @writeback: true if writeback bit set
7431 *
7432 * Generate code for the SRS (Store Return State) insn.
7433 */
7434 static void gen_srs(DisasContext *s,
7435 uint32_t mode, uint32_t amode, bool writeback)
7436 {
7437 int32_t offset;
7438 TCGv_i32 addr, tmp;
7439 bool undef = false;
7440
7441 /* SRS is:
7442 * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7443 * and specified mode is monitor mode
7444 * - UNDEFINED in Hyp mode
7445 * - UNPREDICTABLE in User or System mode
7446 * - UNPREDICTABLE if the specified mode is:
7447 * -- not implemented
7448 * -- not a valid mode number
7449 * -- a mode that's at a higher exception level
7450 * -- Monitor, if we are Non-secure
7451 * For the UNPREDICTABLE cases we choose to UNDEF.
7452 */
7453 if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7454 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7455 return;
7456 }
7457
7458 if (s->current_el == 0 || s->current_el == 2) {
7459 undef = true;
7460 }
7461
7462 switch (mode) {
7463 case ARM_CPU_MODE_USR:
7464 case ARM_CPU_MODE_FIQ:
7465 case ARM_CPU_MODE_IRQ:
7466 case ARM_CPU_MODE_SVC:
7467 case ARM_CPU_MODE_ABT:
7468 case ARM_CPU_MODE_UND:
7469 case ARM_CPU_MODE_SYS:
7470 break;
7471 case ARM_CPU_MODE_HYP:
7472 if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7473 undef = true;
7474 }
7475 break;
7476 case ARM_CPU_MODE_MON:
7477 /* No need to check specifically for "are we non-secure" because
7478 * we've already made EL0 UNDEF and handled the trap for S-EL1;
7479 * so if this isn't EL3 then we must be non-secure.
7480 */
7481 if (s->current_el != 3) {
7482 undef = true;
7483 }
7484 break;
7485 default:
7486 undef = true;
7487 }
7488
7489 if (undef) {
7490 unallocated_encoding(s);
7491 return;
7492 }
7493
7494 addr = tcg_temp_new_i32();
7495 tmp = tcg_const_i32(mode);
7496 /* get_r13_banked() will raise an exception if called from System mode */
7497 gen_set_condexec(s);
7498 gen_set_pc_im(s, s->pc_curr);
7499 gen_helper_get_r13_banked(addr, cpu_env, tmp);
7500 tcg_temp_free_i32(tmp);
7501 switch (amode) {
7502 case 0: /* DA */
7503 offset = -4;
7504 break;
7505 case 1: /* IA */
7506 offset = 0;
7507 break;
7508 case 2: /* DB */
7509 offset = -8;
7510 break;
7511 case 3: /* IB */
7512 offset = 4;
7513 break;
7514 default:
7515 abort();
7516 }
7517 tcg_gen_addi_i32(addr, addr, offset);
7518 tmp = load_reg(s, 14);
7519 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7520 tcg_temp_free_i32(tmp);
7521 tmp = load_cpu_field(spsr);
7522 tcg_gen_addi_i32(addr, addr, 4);
7523 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7524 tcg_temp_free_i32(tmp);
7525 if (writeback) {
7526 switch (amode) {
7527 case 0:
7528 offset = -8;
7529 break;
7530 case 1:
7531 offset = 4;
7532 break;
7533 case 2:
7534 offset = -4;
7535 break;
7536 case 3:
7537 offset = 0;
7538 break;
7539 default:
7540 abort();
7541 }
7542 tcg_gen_addi_i32(addr, addr, offset);
7543 tmp = tcg_const_i32(mode);
7544 gen_helper_set_r13_banked(cpu_env, tmp, addr);
7545 tcg_temp_free_i32(tmp);
7546 }
7547 tcg_temp_free_i32(addr);
7548 s->base.is_jmp = DISAS_UPDATE;
7549 }
7550
7551 /* Generate a label used for skipping this instruction */
7552 static void arm_gen_condlabel(DisasContext *s)
7553 {
7554 if (!s->condjmp) {
7555 s->condlabel = gen_new_label();
7556 s->condjmp = 1;
7557 }
7558 }
7559
7560 /* Skip this instruction if the ARM condition is false */
7561 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7562 {
7563 arm_gen_condlabel(s);
7564 arm_gen_test_cc(cond ^ 1, s->condlabel);
7565 }
7566
7567
7568 /*
7569 * Constant expanders for the decoders.
7570 */
7571
7572 static int negate(DisasContext *s, int x)
7573 {
7574 return -x;
7575 }
7576
7577 static int plus_2(DisasContext *s, int x)
7578 {
7579 return x + 2;
7580 }
7581
7582 static int times_2(DisasContext *s, int x)
7583 {
7584 return x * 2;
7585 }
7586
7587 static int times_4(DisasContext *s, int x)
7588 {
7589 return x * 4;
7590 }
7591
7592 /* Return only the rotation part of T32ExpandImm. */
7593 static int t32_expandimm_rot(DisasContext *s, int x)
7594 {
7595 return x & 0xc00 ? extract32(x, 7, 5) : 0;
7596 }
7597
7598 /* Return the unrotated immediate from T32ExpandImm. */
7599 static int t32_expandimm_imm(DisasContext *s, int x)
7600 {
7601 int imm = extract32(x, 0, 8);
7602
7603 switch (extract32(x, 8, 4)) {
7604 case 0: /* XY */
7605 /* Nothing to do. */
7606 break;
7607 case 1: /* 00XY00XY */
7608 imm *= 0x00010001;
7609 break;
7610 case 2: /* XY00XY00 */
7611 imm *= 0x01000100;
7612 break;
7613 case 3: /* XYXYXYXY */
7614 imm *= 0x01010101;
7615 break;
7616 default:
7617 /* Rotated constant. */
7618 imm |= 0x80;
7619 break;
7620 }
7621 return imm;
7622 }
7623
7624 static int t32_branch24(DisasContext *s, int x)
7625 {
7626 /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S. */
7627 x ^= !(x < 0) * (3 << 21);
7628 /* Append the final zero. */
7629 return x << 1;
7630 }
7631
7632 static int t16_setflags(DisasContext *s)
7633 {
7634 return s->condexec_mask == 0;
7635 }
7636
7637 static int t16_push_list(DisasContext *s, int x)
7638 {
7639 return (x & 0xff) | (x & 0x100) << (14 - 8);
7640 }
7641
7642 static int t16_pop_list(DisasContext *s, int x)
7643 {
7644 return (x & 0xff) | (x & 0x100) << (15 - 8);
7645 }
7646
7647 /*
7648 * Include the generated decoders.
7649 */
7650
7651 #include "decode-a32.inc.c"
7652 #include "decode-a32-uncond.inc.c"
7653 #include "decode-t32.inc.c"
7654 #include "decode-t16.inc.c"
7655
7656 /* Helpers to swap operands for reverse-subtract. */
7657 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7658 {
7659 tcg_gen_sub_i32(dst, b, a);
7660 }
7661
7662 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7663 {
7664 gen_sub_CC(dst, b, a);
7665 }
7666
7667 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7668 {
7669 gen_sub_carry(dest, b, a);
7670 }
7671
7672 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7673 {
7674 gen_sbc_CC(dest, b, a);
7675 }
7676
7677 /*
7678 * Helpers for the data processing routines.
7679 *
7680 * After the computation store the results back.
7681 * This may be suppressed altogether (STREG_NONE), require a runtime
7682 * check against the stack limits (STREG_SP_CHECK), or generate an
7683 * exception return. Oh, or store into a register.
7684 *
7685 * Always return true, indicating success for a trans_* function.
7686 */
7687 typedef enum {
7688 STREG_NONE,
7689 STREG_NORMAL,
7690 STREG_SP_CHECK,
7691 STREG_EXC_RET,
7692 } StoreRegKind;
7693
7694 static bool store_reg_kind(DisasContext *s, int rd,
7695 TCGv_i32 val, StoreRegKind kind)
7696 {
7697 switch (kind) {
7698 case STREG_NONE:
7699 tcg_temp_free_i32(val);
7700 return true;
7701 case STREG_NORMAL:
7702 /* See ALUWritePC: Interworking only from a32 mode. */
7703 if (s->thumb) {
7704 store_reg(s, rd, val);
7705 } else {
7706 store_reg_bx(s, rd, val);
7707 }
7708 return true;
7709 case STREG_SP_CHECK:
7710 store_sp_checked(s, val);
7711 return true;
7712 case STREG_EXC_RET:
7713 gen_exception_return(s, val);
7714 return true;
7715 }
7716 g_assert_not_reached();
7717 }
7718
7719 /*
7720 * Data Processing (register)
7721 *
7722 * Operate, with set flags, one register source,
7723 * one immediate shifted register source, and a destination.
7724 */
7725 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7726 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7727 int logic_cc, StoreRegKind kind)
7728 {
7729 TCGv_i32 tmp1, tmp2;
7730
7731 tmp2 = load_reg(s, a->rm);
7732 gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7733 tmp1 = load_reg(s, a->rn);
7734
7735 gen(tmp1, tmp1, tmp2);
7736 tcg_temp_free_i32(tmp2);
7737
7738 if (logic_cc) {
7739 gen_logic_CC(tmp1);
7740 }
7741 return store_reg_kind(s, a->rd, tmp1, kind);
7742 }
7743
7744 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7745 void (*gen)(TCGv_i32, TCGv_i32),
7746 int logic_cc, StoreRegKind kind)
7747 {
7748 TCGv_i32 tmp;
7749
7750 tmp = load_reg(s, a->rm);
7751 gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7752
7753 gen(tmp, tmp);
7754 if (logic_cc) {
7755 gen_logic_CC(tmp);
7756 }
7757 return store_reg_kind(s, a->rd, tmp, kind);
7758 }
7759
7760 /*
7761 * Data-processing (register-shifted register)
7762 *
7763 * Operate, with set flags, one register source,
7764 * one register shifted register source, and a destination.
7765 */
7766 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7767 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7768 int logic_cc, StoreRegKind kind)
7769 {
7770 TCGv_i32 tmp1, tmp2;
7771
7772 tmp1 = load_reg(s, a->rs);
7773 tmp2 = load_reg(s, a->rm);
7774 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7775 tmp1 = load_reg(s, a->rn);
7776
7777 gen(tmp1, tmp1, tmp2);
7778 tcg_temp_free_i32(tmp2);
7779
7780 if (logic_cc) {
7781 gen_logic_CC(tmp1);
7782 }
7783 return store_reg_kind(s, a->rd, tmp1, kind);
7784 }
7785
7786 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7787 void (*gen)(TCGv_i32, TCGv_i32),
7788 int logic_cc, StoreRegKind kind)
7789 {
7790 TCGv_i32 tmp1, tmp2;
7791
7792 tmp1 = load_reg(s, a->rs);
7793 tmp2 = load_reg(s, a->rm);
7794 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7795
7796 gen(tmp2, tmp2);
7797 if (logic_cc) {
7798 gen_logic_CC(tmp2);
7799 }
7800 return store_reg_kind(s, a->rd, tmp2, kind);
7801 }
7802
7803 /*
7804 * Data-processing (immediate)
7805 *
7806 * Operate, with set flags, one register source,
7807 * one rotated immediate, and a destination.
7808 *
7809 * Note that logic_cc && a->rot setting CF based on the msb of the
7810 * immediate is the reason why we must pass in the unrotated form
7811 * of the immediate.
7812 */
7813 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7814 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7815 int logic_cc, StoreRegKind kind)
7816 {
7817 TCGv_i32 tmp1, tmp2;
7818 uint32_t imm;
7819
7820 imm = ror32(a->imm, a->rot);
7821 if (logic_cc && a->rot) {
7822 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7823 }
7824 tmp2 = tcg_const_i32(imm);
7825 tmp1 = load_reg(s, a->rn);
7826
7827 gen(tmp1, tmp1, tmp2);
7828 tcg_temp_free_i32(tmp2);
7829
7830 if (logic_cc) {
7831 gen_logic_CC(tmp1);
7832 }
7833 return store_reg_kind(s, a->rd, tmp1, kind);
7834 }
7835
7836 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7837 void (*gen)(TCGv_i32, TCGv_i32),
7838 int logic_cc, StoreRegKind kind)
7839 {
7840 TCGv_i32 tmp;
7841 uint32_t imm;
7842
7843 imm = ror32(a->imm, a->rot);
7844 if (logic_cc && a->rot) {
7845 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7846 }
7847 tmp = tcg_const_i32(imm);
7848
7849 gen(tmp, tmp);
7850 if (logic_cc) {
7851 gen_logic_CC(tmp);
7852 }
7853 return store_reg_kind(s, a->rd, tmp, kind);
7854 }
7855
7856 #define DO_ANY3(NAME, OP, L, K) \
7857 static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a) \
7858 { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); } \
7859 static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a) \
7860 { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); } \
7861 static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a) \
7862 { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7863
7864 #define DO_ANY2(NAME, OP, L, K) \
7865 static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a) \
7866 { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); } \
7867 static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a) \
7868 { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); } \
7869 static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a) \
7870 { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7871
7872 #define DO_CMP2(NAME, OP, L) \
7873 static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a) \
7874 { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); } \
7875 static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a) \
7876 { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); } \
7877 static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a) \
7878 { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7879
7880 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7881 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7882 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7883 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7884
7885 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7886 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7887 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7888 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7889
7890 DO_CMP2(TST, tcg_gen_and_i32, true)
7891 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7892 DO_CMP2(CMN, gen_add_CC, false)
7893 DO_CMP2(CMP, gen_sub_CC, false)
7894
7895 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7896 a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7897
7898 /*
7899 * Note for the computation of StoreRegKind we return out of the
7900 * middle of the functions that are expanded by DO_ANY3, and that
7901 * we modify a->s via that parameter before it is used by OP.
7902 */
7903 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7904 ({
7905 StoreRegKind ret = STREG_NORMAL;
7906 if (a->rd == 15 && a->s) {
7907 /*
7908 * See ALUExceptionReturn:
7909 * In User mode, UNPREDICTABLE; we choose UNDEF.
7910 * In Hyp mode, UNDEFINED.
7911 */
7912 if (IS_USER(s) || s->current_el == 2) {
7913 unallocated_encoding(s);
7914 return true;
7915 }
7916 /* There is no writeback of nzcv to PSTATE. */
7917 a->s = 0;
7918 ret = STREG_EXC_RET;
7919 } else if (a->rd == 13 && a->rn == 13) {
7920 ret = STREG_SP_CHECK;
7921 }
7922 ret;
7923 }))
7924
7925 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7926 ({
7927 StoreRegKind ret = STREG_NORMAL;
7928 if (a->rd == 15 && a->s) {
7929 /*
7930 * See ALUExceptionReturn:
7931 * In User mode, UNPREDICTABLE; we choose UNDEF.
7932 * In Hyp mode, UNDEFINED.
7933 */
7934 if (IS_USER(s) || s->current_el == 2) {
7935 unallocated_encoding(s);
7936 return true;
7937 }
7938 /* There is no writeback of nzcv to PSTATE. */
7939 a->s = 0;
7940 ret = STREG_EXC_RET;
7941 } else if (a->rd == 13) {
7942 ret = STREG_SP_CHECK;
7943 }
7944 ret;
7945 }))
7946
7947 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7948
7949 /*
7950 * ORN is only available with T32, so there is no register-shifted-register
7951 * form of the insn. Using the DO_ANY3 macro would create an unused function.
7952 */
7953 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7954 {
7955 return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7956 }
7957
7958 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7959 {
7960 return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7961 }
7962
7963 #undef DO_ANY3
7964 #undef DO_ANY2
7965 #undef DO_CMP2
7966
7967 static bool trans_ADR(DisasContext *s, arg_ri *a)
7968 {
7969 store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7970 return true;
7971 }
7972
7973 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7974 {
7975 TCGv_i32 tmp;
7976
7977 if (!ENABLE_ARCH_6T2) {
7978 return false;
7979 }
7980
7981 tmp = tcg_const_i32(a->imm);
7982 store_reg(s, a->rd, tmp);
7983 return true;
7984 }
7985
7986 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
7987 {
7988 TCGv_i32 tmp;
7989
7990 if (!ENABLE_ARCH_6T2) {
7991 return false;
7992 }
7993
7994 tmp = load_reg(s, a->rd);
7995 tcg_gen_ext16u_i32(tmp, tmp);
7996 tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
7997 store_reg(s, a->rd, tmp);
7998 return true;
7999 }
8000
8001 /*
8002 * Multiply and multiply accumulate
8003 */
8004
8005 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
8006 {
8007 TCGv_i32 t1, t2;
8008
8009 t1 = load_reg(s, a->rn);
8010 t2 = load_reg(s, a->rm);
8011 tcg_gen_mul_i32(t1, t1, t2);
8012 tcg_temp_free_i32(t2);
8013 if (add) {
8014 t2 = load_reg(s, a->ra);
8015 tcg_gen_add_i32(t1, t1, t2);
8016 tcg_temp_free_i32(t2);
8017 }
8018 if (a->s) {
8019 gen_logic_CC(t1);
8020 }
8021 store_reg(s, a->rd, t1);
8022 return true;
8023 }
8024
8025 static bool trans_MUL(DisasContext *s, arg_MUL *a)
8026 {
8027 return op_mla(s, a, false);
8028 }
8029
8030 static bool trans_MLA(DisasContext *s, arg_MLA *a)
8031 {
8032 return op_mla(s, a, true);
8033 }
8034
8035 static bool trans_MLS(DisasContext *s, arg_MLS *a)
8036 {
8037 TCGv_i32 t1, t2;
8038
8039 if (!ENABLE_ARCH_6T2) {
8040 return false;
8041 }
8042 t1 = load_reg(s, a->rn);
8043 t2 = load_reg(s, a->rm);
8044 tcg_gen_mul_i32(t1, t1, t2);
8045 tcg_temp_free_i32(t2);
8046 t2 = load_reg(s, a->ra);
8047 tcg_gen_sub_i32(t1, t2, t1);
8048 tcg_temp_free_i32(t2);
8049 store_reg(s, a->rd, t1);
8050 return true;
8051 }
8052
8053 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
8054 {
8055 TCGv_i32 t0, t1, t2, t3;
8056
8057 t0 = load_reg(s, a->rm);
8058 t1 = load_reg(s, a->rn);
8059 if (uns) {
8060 tcg_gen_mulu2_i32(t0, t1, t0, t1);
8061 } else {
8062 tcg_gen_muls2_i32(t0, t1, t0, t1);
8063 }
8064 if (add) {
8065 t2 = load_reg(s, a->ra);
8066 t3 = load_reg(s, a->rd);
8067 tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
8068 tcg_temp_free_i32(t2);
8069 tcg_temp_free_i32(t3);
8070 }
8071 if (a->s) {
8072 gen_logicq_cc(t0, t1);
8073 }
8074 store_reg(s, a->ra, t0);
8075 store_reg(s, a->rd, t1);
8076 return true;
8077 }
8078
8079 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
8080 {
8081 return op_mlal(s, a, true, false);
8082 }
8083
8084 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
8085 {
8086 return op_mlal(s, a, false, false);
8087 }
8088
8089 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
8090 {
8091 return op_mlal(s, a, true, true);
8092 }
8093
8094 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
8095 {
8096 return op_mlal(s, a, false, true);
8097 }
8098
8099 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
8100 {
8101 TCGv_i32 t0, t1, t2, zero;
8102
8103 if (s->thumb
8104 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8105 : !ENABLE_ARCH_6) {
8106 return false;
8107 }
8108
8109 t0 = load_reg(s, a->rm);
8110 t1 = load_reg(s, a->rn);
8111 tcg_gen_mulu2_i32(t0, t1, t0, t1);
8112 zero = tcg_const_i32(0);
8113 t2 = load_reg(s, a->ra);
8114 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8115 tcg_temp_free_i32(t2);
8116 t2 = load_reg(s, a->rd);
8117 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8118 tcg_temp_free_i32(t2);
8119 tcg_temp_free_i32(zero);
8120 store_reg(s, a->ra, t0);
8121 store_reg(s, a->rd, t1);
8122 return true;
8123 }
8124
8125 /*
8126 * Saturating addition and subtraction
8127 */
8128
8129 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
8130 {
8131 TCGv_i32 t0, t1;
8132
8133 if (s->thumb
8134 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8135 : !ENABLE_ARCH_5TE) {
8136 return false;
8137 }
8138
8139 t0 = load_reg(s, a->rm);
8140 t1 = load_reg(s, a->rn);
8141 if (doub) {
8142 gen_helper_add_saturate(t1, cpu_env, t1, t1);
8143 }
8144 if (add) {
8145 gen_helper_add_saturate(t0, cpu_env, t0, t1);
8146 } else {
8147 gen_helper_sub_saturate(t0, cpu_env, t0, t1);
8148 }
8149 tcg_temp_free_i32(t1);
8150 store_reg(s, a->rd, t0);
8151 return true;
8152 }
8153
8154 #define DO_QADDSUB(NAME, ADD, DOUB) \
8155 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8156 { \
8157 return op_qaddsub(s, a, ADD, DOUB); \
8158 }
8159
8160 DO_QADDSUB(QADD, true, false)
8161 DO_QADDSUB(QSUB, false, false)
8162 DO_QADDSUB(QDADD, true, true)
8163 DO_QADDSUB(QDSUB, false, true)
8164
8165 #undef DO_QADDSUB
8166
8167 /*
8168 * Halfword multiply and multiply accumulate
8169 */
8170
8171 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
8172 int add_long, bool nt, bool mt)
8173 {
8174 TCGv_i32 t0, t1, tl, th;
8175
8176 if (s->thumb
8177 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8178 : !ENABLE_ARCH_5TE) {
8179 return false;
8180 }
8181
8182 t0 = load_reg(s, a->rn);
8183 t1 = load_reg(s, a->rm);
8184 gen_mulxy(t0, t1, nt, mt);
8185 tcg_temp_free_i32(t1);
8186
8187 switch (add_long) {
8188 case 0:
8189 store_reg(s, a->rd, t0);
8190 break;
8191 case 1:
8192 t1 = load_reg(s, a->ra);
8193 gen_helper_add_setq(t0, cpu_env, t0, t1);
8194 tcg_temp_free_i32(t1);
8195 store_reg(s, a->rd, t0);
8196 break;
8197 case 2:
8198 tl = load_reg(s, a->ra);
8199 th = load_reg(s, a->rd);
8200 /* Sign-extend the 32-bit product to 64 bits. */
8201 t1 = tcg_temp_new_i32();
8202 tcg_gen_sari_i32(t1, t0, 31);
8203 tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
8204 tcg_temp_free_i32(t0);
8205 tcg_temp_free_i32(t1);
8206 store_reg(s, a->ra, tl);
8207 store_reg(s, a->rd, th);
8208 break;
8209 default:
8210 g_assert_not_reached();
8211 }
8212 return true;
8213 }
8214
8215 #define DO_SMLAX(NAME, add, nt, mt) \
8216 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8217 { \
8218 return op_smlaxxx(s, a, add, nt, mt); \
8219 }
8220
8221 DO_SMLAX(SMULBB, 0, 0, 0)
8222 DO_SMLAX(SMULBT, 0, 0, 1)
8223 DO_SMLAX(SMULTB, 0, 1, 0)
8224 DO_SMLAX(SMULTT, 0, 1, 1)
8225
8226 DO_SMLAX(SMLABB, 1, 0, 0)
8227 DO_SMLAX(SMLABT, 1, 0, 1)
8228 DO_SMLAX(SMLATB, 1, 1, 0)
8229 DO_SMLAX(SMLATT, 1, 1, 1)
8230
8231 DO_SMLAX(SMLALBB, 2, 0, 0)
8232 DO_SMLAX(SMLALBT, 2, 0, 1)
8233 DO_SMLAX(SMLALTB, 2, 1, 0)
8234 DO_SMLAX(SMLALTT, 2, 1, 1)
8235
8236 #undef DO_SMLAX
8237
8238 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8239 {
8240 TCGv_i32 t0, t1;
8241
8242 if (!ENABLE_ARCH_5TE) {
8243 return false;
8244 }
8245
8246 t0 = load_reg(s, a->rn);
8247 t1 = load_reg(s, a->rm);
8248 /*
8249 * Since the nominal result is product<47:16>, shift the 16-bit
8250 * input up by 16 bits, so that the result is at product<63:32>.
8251 */
8252 if (mt) {
8253 tcg_gen_andi_i32(t1, t1, 0xffff0000);
8254 } else {
8255 tcg_gen_shli_i32(t1, t1, 16);
8256 }
8257 tcg_gen_muls2_i32(t0, t1, t0, t1);
8258 tcg_temp_free_i32(t0);
8259 if (add) {
8260 t0 = load_reg(s, a->ra);
8261 gen_helper_add_setq(t1, cpu_env, t1, t0);
8262 tcg_temp_free_i32(t0);
8263 }
8264 store_reg(s, a->rd, t1);
8265 return true;
8266 }
8267
8268 #define DO_SMLAWX(NAME, add, mt) \
8269 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8270 { \
8271 return op_smlawx(s, a, add, mt); \
8272 }
8273
8274 DO_SMLAWX(SMULWB, 0, 0)
8275 DO_SMLAWX(SMULWT, 0, 1)
8276 DO_SMLAWX(SMLAWB, 1, 0)
8277 DO_SMLAWX(SMLAWT, 1, 1)
8278
8279 #undef DO_SMLAWX
8280
8281 /*
8282 * MSR (immediate) and hints
8283 */
8284
8285 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8286 {
8287 /*
8288 * When running single-threaded TCG code, use the helper to ensure that
8289 * the next round-robin scheduled vCPU gets a crack. When running in
8290 * MTTCG we don't generate jumps to the helper as it won't affect the
8291 * scheduling of other vCPUs.
8292 */
8293 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8294 gen_set_pc_im(s, s->base.pc_next);
8295 s->base.is_jmp = DISAS_YIELD;
8296 }
8297 return true;
8298 }
8299
8300 static bool trans_WFE(DisasContext *s, arg_WFE *a)
8301 {
8302 /*
8303 * When running single-threaded TCG code, use the helper to ensure that
8304 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
8305 * just skip this instruction. Currently the SEV/SEVL instructions,
8306 * which are *one* of many ways to wake the CPU from WFE, are not
8307 * implemented so we can't sleep like WFI does.
8308 */
8309 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8310 gen_set_pc_im(s, s->base.pc_next);
8311 s->base.is_jmp = DISAS_WFE;
8312 }
8313 return true;
8314 }
8315
8316 static bool trans_WFI(DisasContext *s, arg_WFI *a)
8317 {
8318 /* For WFI, halt the vCPU until an IRQ. */
8319 gen_set_pc_im(s, s->base.pc_next);
8320 s->base.is_jmp = DISAS_WFI;
8321 return true;
8322 }
8323
8324 static bool trans_NOP(DisasContext *s, arg_NOP *a)
8325 {
8326 return true;
8327 }
8328
8329 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8330 {
8331 uint32_t val = ror32(a->imm, a->rot * 2);
8332 uint32_t mask = msr_mask(s, a->mask, a->r);
8333
8334 if (gen_set_psr_im(s, mask, a->r, val)) {
8335 unallocated_encoding(s);
8336 }
8337 return true;
8338 }
8339
8340 /*
8341 * Cyclic Redundancy Check
8342 */
8343
8344 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8345 {
8346 TCGv_i32 t1, t2, t3;
8347
8348 if (!dc_isar_feature(aa32_crc32, s)) {
8349 return false;
8350 }
8351
8352 t1 = load_reg(s, a->rn);
8353 t2 = load_reg(s, a->rm);
8354 switch (sz) {
8355 case MO_8:
8356 gen_uxtb(t2);
8357 break;
8358 case MO_16:
8359 gen_uxth(t2);
8360 break;
8361 case MO_32:
8362 break;
8363 default:
8364 g_assert_not_reached();
8365 }
8366 t3 = tcg_const_i32(1 << sz);
8367 if (c) {
8368 gen_helper_crc32c(t1, t1, t2, t3);
8369 } else {
8370 gen_helper_crc32(t1, t1, t2, t3);
8371 }
8372 tcg_temp_free_i32(t2);
8373 tcg_temp_free_i32(t3);
8374 store_reg(s, a->rd, t1);
8375 return true;
8376 }
8377
8378 #define DO_CRC32(NAME, c, sz) \
8379 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8380 { return op_crc32(s, a, c, sz); }
8381
8382 DO_CRC32(CRC32B, false, MO_8)
8383 DO_CRC32(CRC32H, false, MO_16)
8384 DO_CRC32(CRC32W, false, MO_32)
8385 DO_CRC32(CRC32CB, true, MO_8)
8386 DO_CRC32(CRC32CH, true, MO_16)
8387 DO_CRC32(CRC32CW, true, MO_32)
8388
8389 #undef DO_CRC32
8390
8391 /*
8392 * Miscellaneous instructions
8393 */
8394
8395 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8396 {
8397 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8398 return false;
8399 }
8400 gen_mrs_banked(s, a->r, a->sysm, a->rd);
8401 return true;
8402 }
8403
8404 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8405 {
8406 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8407 return false;
8408 }
8409 gen_msr_banked(s, a->r, a->sysm, a->rn);
8410 return true;
8411 }
8412
8413 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8414 {
8415 TCGv_i32 tmp;
8416
8417 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8418 return false;
8419 }
8420 if (a->r) {
8421 if (IS_USER(s)) {
8422 unallocated_encoding(s);
8423 return true;
8424 }
8425 tmp = load_cpu_field(spsr);
8426 } else {
8427 tmp = tcg_temp_new_i32();
8428 gen_helper_cpsr_read(tmp, cpu_env);
8429 }
8430 store_reg(s, a->rd, tmp);
8431 return true;
8432 }
8433
8434 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8435 {
8436 TCGv_i32 tmp;
8437 uint32_t mask = msr_mask(s, a->mask, a->r);
8438
8439 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8440 return false;
8441 }
8442 tmp = load_reg(s, a->rn);
8443 if (gen_set_psr(s, mask, a->r, tmp)) {
8444 unallocated_encoding(s);
8445 }
8446 return true;
8447 }
8448
8449 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8450 {
8451 TCGv_i32 tmp;
8452
8453 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8454 return false;
8455 }
8456 tmp = tcg_const_i32(a->sysm);
8457 gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8458 store_reg(s, a->rd, tmp);
8459 return true;
8460 }
8461
8462 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8463 {
8464 TCGv_i32 addr, reg;
8465
8466 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8467 return false;
8468 }
8469 addr = tcg_const_i32((a->mask << 10) | a->sysm);
8470 reg = load_reg(s, a->rn);
8471 gen_helper_v7m_msr(cpu_env, addr, reg);
8472 tcg_temp_free_i32(addr);
8473 tcg_temp_free_i32(reg);
8474 /* If we wrote to CONTROL, the EL might have changed */
8475 gen_helper_rebuild_hflags_m32_newel(cpu_env);
8476 gen_lookup_tb(s);
8477 return true;
8478 }
8479
8480 static bool trans_BX(DisasContext *s, arg_BX *a)
8481 {
8482 if (!ENABLE_ARCH_4T) {
8483 return false;
8484 }
8485 gen_bx_excret(s, load_reg(s, a->rm));
8486 return true;
8487 }
8488
8489 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8490 {
8491 if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8492 return false;
8493 }
8494 /* Trivial implementation equivalent to bx. */
8495 gen_bx(s, load_reg(s, a->rm));
8496 return true;
8497 }
8498
8499 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8500 {
8501 TCGv_i32 tmp;
8502
8503 if (!ENABLE_ARCH_5) {
8504 return false;
8505 }
8506 tmp = load_reg(s, a->rm);
8507 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8508 gen_bx(s, tmp);
8509 return true;
8510 }
8511
8512 /*
8513 * BXNS/BLXNS: only exist for v8M with the security extensions,
8514 * and always UNDEF if NonSecure. We don't implement these in
8515 * the user-only mode either (in theory you can use them from
8516 * Secure User mode but they are too tied in to system emulation).
8517 */
8518 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8519 {
8520 if (!s->v8m_secure || IS_USER_ONLY) {
8521 unallocated_encoding(s);
8522 } else {
8523 gen_bxns(s, a->rm);
8524 }
8525 return true;
8526 }
8527
8528 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8529 {
8530 if (!s->v8m_secure || IS_USER_ONLY) {
8531 unallocated_encoding(s);
8532 } else {
8533 gen_blxns(s, a->rm);
8534 }
8535 return true;
8536 }
8537
8538 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8539 {
8540 TCGv_i32 tmp;
8541
8542 if (!ENABLE_ARCH_5) {
8543 return false;
8544 }
8545 tmp = load_reg(s, a->rm);
8546 tcg_gen_clzi_i32(tmp, tmp, 32);
8547 store_reg(s, a->rd, tmp);
8548 return true;
8549 }
8550
8551 static bool trans_ERET(DisasContext *s, arg_ERET *a)
8552 {
8553 TCGv_i32 tmp;
8554
8555 if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8556 return false;
8557 }
8558 if (IS_USER(s)) {
8559 unallocated_encoding(s);
8560 return true;
8561 }
8562 if (s->current_el == 2) {
8563 /* ERET from Hyp uses ELR_Hyp, not LR */
8564 tmp = load_cpu_field(elr_el[2]);
8565 } else {
8566 tmp = load_reg(s, 14);
8567 }
8568 gen_exception_return(s, tmp);
8569 return true;
8570 }
8571
8572 static bool trans_HLT(DisasContext *s, arg_HLT *a)
8573 {
8574 gen_hlt(s, a->imm);
8575 return true;
8576 }
8577
8578 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8579 {
8580 if (!ENABLE_ARCH_5) {
8581 return false;
8582 }
8583 if (arm_dc_feature(s, ARM_FEATURE_M) &&
8584 semihosting_enabled() &&
8585 #ifndef CONFIG_USER_ONLY
8586 !IS_USER(s) &&
8587 #endif
8588 (a->imm == 0xab)) {
8589 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8590 } else {
8591 gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8592 }
8593 return true;
8594 }
8595
8596 static bool trans_HVC(DisasContext *s, arg_HVC *a)
8597 {
8598 if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8599 return false;
8600 }
8601 if (IS_USER(s)) {
8602 unallocated_encoding(s);
8603 } else {
8604 gen_hvc(s, a->imm);
8605 }
8606 return true;
8607 }
8608
8609 static bool trans_SMC(DisasContext *s, arg_SMC *a)
8610 {
8611 if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8612 return false;
8613 }
8614 if (IS_USER(s)) {
8615 unallocated_encoding(s);
8616 } else {
8617 gen_smc(s);
8618 }
8619 return true;
8620 }
8621
8622 static bool trans_SG(DisasContext *s, arg_SG *a)
8623 {
8624 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8625 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8626 return false;
8627 }
8628 /*
8629 * SG (v8M only)
8630 * The bulk of the behaviour for this instruction is implemented
8631 * in v7m_handle_execute_nsc(), which deals with the insn when
8632 * it is executed by a CPU in non-secure state from memory
8633 * which is Secure & NonSecure-Callable.
8634 * Here we only need to handle the remaining cases:
8635 * * in NS memory (including the "security extension not
8636 * implemented" case) : NOP
8637 * * in S memory but CPU already secure (clear IT bits)
8638 * We know that the attribute for the memory this insn is
8639 * in must match the current CPU state, because otherwise
8640 * get_phys_addr_pmsav8 would have generated an exception.
8641 */
8642 if (s->v8m_secure) {
8643 /* Like the IT insn, we don't need to generate any code */
8644 s->condexec_cond = 0;
8645 s->condexec_mask = 0;
8646 }
8647 return true;
8648 }
8649
8650 static bool trans_TT(DisasContext *s, arg_TT *a)
8651 {
8652 TCGv_i32 addr, tmp;
8653
8654 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8655 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8656 return false;
8657 }
8658 if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8659 /* We UNDEF for these UNPREDICTABLE cases */
8660 unallocated_encoding(s);
8661 return true;
8662 }
8663 if (a->A && !s->v8m_secure) {
8664 /* This case is UNDEFINED. */
8665 unallocated_encoding(s);
8666 return true;
8667 }
8668
8669 addr = load_reg(s, a->rn);
8670 tmp = tcg_const_i32((a->A << 1) | a->T);
8671 gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8672 tcg_temp_free_i32(addr);
8673 store_reg(s, a->rd, tmp);
8674 return true;
8675 }
8676
8677 /*
8678 * Load/store register index
8679 */
8680
8681 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8682 {
8683 ISSInfo ret;
8684
8685 /* ISS not valid if writeback */
8686 if (p && !w) {
8687 ret = rd;
8688 if (s->base.pc_next - s->pc_curr == 2) {
8689 ret |= ISSIs16Bit;
8690 }
8691 } else {
8692 ret = ISSInvalid;
8693 }
8694 return ret;
8695 }
8696
8697 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8698 {
8699 TCGv_i32 addr = load_reg(s, a->rn);
8700
8701 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8702 gen_helper_v8m_stackcheck(cpu_env, addr);
8703 }
8704
8705 if (a->p) {
8706 TCGv_i32 ofs = load_reg(s, a->rm);
8707 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8708 if (a->u) {
8709 tcg_gen_add_i32(addr, addr, ofs);
8710 } else {
8711 tcg_gen_sub_i32(addr, addr, ofs);
8712 }
8713 tcg_temp_free_i32(ofs);
8714 }
8715 return addr;
8716 }
8717
8718 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8719 TCGv_i32 addr, int address_offset)
8720 {
8721 if (!a->p) {
8722 TCGv_i32 ofs = load_reg(s, a->rm);
8723 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8724 if (a->u) {
8725 tcg_gen_add_i32(addr, addr, ofs);
8726 } else {
8727 tcg_gen_sub_i32(addr, addr, ofs);
8728 }
8729 tcg_temp_free_i32(ofs);
8730 } else if (!a->w) {
8731 tcg_temp_free_i32(addr);
8732 return;
8733 }
8734 tcg_gen_addi_i32(addr, addr, address_offset);
8735 store_reg(s, a->rn, addr);
8736 }
8737
8738 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8739 MemOp mop, int mem_idx)
8740 {
8741 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8742 TCGv_i32 addr, tmp;
8743
8744 addr = op_addr_rr_pre(s, a);
8745
8746 tmp = tcg_temp_new_i32();
8747 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8748 disas_set_da_iss(s, mop, issinfo);
8749
8750 /*
8751 * Perform base writeback before the loaded value to
8752 * ensure correct behavior with overlapping index registers.
8753 */
8754 op_addr_rr_post(s, a, addr, 0);
8755 store_reg_from_load(s, a->rt, tmp);
8756 return true;
8757 }
8758
8759 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8760 MemOp mop, int mem_idx)
8761 {
8762 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8763 TCGv_i32 addr, tmp;
8764
8765 addr = op_addr_rr_pre(s, a);
8766
8767 tmp = load_reg(s, a->rt);
8768 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8769 disas_set_da_iss(s, mop, issinfo);
8770 tcg_temp_free_i32(tmp);
8771
8772 op_addr_rr_post(s, a, addr, 0);
8773 return true;
8774 }
8775
8776 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8777 {
8778 int mem_idx = get_mem_index(s);
8779 TCGv_i32 addr, tmp;
8780
8781 if (!ENABLE_ARCH_5TE) {
8782 return false;
8783 }
8784 if (a->rt & 1) {
8785 unallocated_encoding(s);
8786 return true;
8787 }
8788 addr = op_addr_rr_pre(s, a);
8789
8790 tmp = tcg_temp_new_i32();
8791 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8792 store_reg(s, a->rt, tmp);
8793
8794 tcg_gen_addi_i32(addr, addr, 4);
8795
8796 tmp = tcg_temp_new_i32();
8797 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8798 store_reg(s, a->rt + 1, tmp);
8799
8800 /* LDRD w/ base writeback is undefined if the registers overlap. */
8801 op_addr_rr_post(s, a, addr, -4);
8802 return true;
8803 }
8804
8805 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8806 {
8807 int mem_idx = get_mem_index(s);
8808 TCGv_i32 addr, tmp;
8809
8810 if (!ENABLE_ARCH_5TE) {
8811 return false;
8812 }
8813 if (a->rt & 1) {
8814 unallocated_encoding(s);
8815 return true;
8816 }
8817 addr = op_addr_rr_pre(s, a);
8818
8819 tmp = load_reg(s, a->rt);
8820 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8821 tcg_temp_free_i32(tmp);
8822
8823 tcg_gen_addi_i32(addr, addr, 4);
8824
8825 tmp = load_reg(s, a->rt + 1);
8826 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8827 tcg_temp_free_i32(tmp);
8828
8829 op_addr_rr_post(s, a, addr, -4);
8830 return true;
8831 }
8832
8833 /*
8834 * Load/store immediate index
8835 */
8836
8837 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8838 {
8839 int ofs = a->imm;
8840
8841 if (!a->u) {
8842 ofs = -ofs;
8843 }
8844
8845 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8846 /*
8847 * Stackcheck. Here we know 'addr' is the current SP;
8848 * U is set if we're moving SP up, else down. It is
8849 * UNKNOWN whether the limit check triggers when SP starts
8850 * below the limit and ends up above it; we chose to do so.
8851 */
8852 if (!a->u) {
8853 TCGv_i32 newsp = tcg_temp_new_i32();
8854 tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8855 gen_helper_v8m_stackcheck(cpu_env, newsp);
8856 tcg_temp_free_i32(newsp);
8857 } else {
8858 gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8859 }
8860 }
8861
8862 return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8863 }
8864
8865 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8866 TCGv_i32 addr, int address_offset)
8867 {
8868 if (!a->p) {
8869 if (a->u) {
8870 address_offset += a->imm;
8871 } else {
8872 address_offset -= a->imm;
8873 }
8874 } else if (!a->w) {
8875 tcg_temp_free_i32(addr);
8876 return;
8877 }
8878 tcg_gen_addi_i32(addr, addr, address_offset);
8879 store_reg(s, a->rn, addr);
8880 }
8881
8882 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8883 MemOp mop, int mem_idx)
8884 {
8885 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8886 TCGv_i32 addr, tmp;
8887
8888 addr = op_addr_ri_pre(s, a);
8889
8890 tmp = tcg_temp_new_i32();
8891 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8892 disas_set_da_iss(s, mop, issinfo);
8893
8894 /*
8895 * Perform base writeback before the loaded value to
8896 * ensure correct behavior with overlapping index registers.
8897 */
8898 op_addr_ri_post(s, a, addr, 0);
8899 store_reg_from_load(s, a->rt, tmp);
8900 return true;
8901 }
8902
8903 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8904 MemOp mop, int mem_idx)
8905 {
8906 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8907 TCGv_i32 addr, tmp;
8908
8909 addr = op_addr_ri_pre(s, a);
8910
8911 tmp = load_reg(s, a->rt);
8912 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8913 disas_set_da_iss(s, mop, issinfo);
8914 tcg_temp_free_i32(tmp);
8915
8916 op_addr_ri_post(s, a, addr, 0);
8917 return true;
8918 }
8919
8920 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8921 {
8922 int mem_idx = get_mem_index(s);
8923 TCGv_i32 addr, tmp;
8924
8925 addr = op_addr_ri_pre(s, a);
8926
8927 tmp = tcg_temp_new_i32();
8928 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8929 store_reg(s, a->rt, tmp);
8930
8931 tcg_gen_addi_i32(addr, addr, 4);
8932
8933 tmp = tcg_temp_new_i32();
8934 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8935 store_reg(s, rt2, tmp);
8936
8937 /* LDRD w/ base writeback is undefined if the registers overlap. */
8938 op_addr_ri_post(s, a, addr, -4);
8939 return true;
8940 }
8941
8942 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8943 {
8944 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8945 return false;
8946 }
8947 return op_ldrd_ri(s, a, a->rt + 1);
8948 }
8949
8950 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8951 {
8952 arg_ldst_ri b = {
8953 .u = a->u, .w = a->w, .p = a->p,
8954 .rn = a->rn, .rt = a->rt, .imm = a->imm
8955 };
8956 return op_ldrd_ri(s, &b, a->rt2);
8957 }
8958
8959 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8960 {
8961 int mem_idx = get_mem_index(s);
8962 TCGv_i32 addr, tmp;
8963
8964 addr = op_addr_ri_pre(s, a);
8965
8966 tmp = load_reg(s, a->rt);
8967 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8968 tcg_temp_free_i32(tmp);
8969
8970 tcg_gen_addi_i32(addr, addr, 4);
8971
8972 tmp = load_reg(s, rt2);
8973 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8974 tcg_temp_free_i32(tmp);
8975
8976 op_addr_ri_post(s, a, addr, -4);
8977 return true;
8978 }
8979
8980 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8981 {
8982 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8983 return false;
8984 }
8985 return op_strd_ri(s, a, a->rt + 1);
8986 }
8987
8988 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8989 {
8990 arg_ldst_ri b = {
8991 .u = a->u, .w = a->w, .p = a->p,
8992 .rn = a->rn, .rt = a->rt, .imm = a->imm
8993 };
8994 return op_strd_ri(s, &b, a->rt2);
8995 }
8996
8997 #define DO_LDST(NAME, WHICH, MEMOP) \
8998 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a) \
8999 { \
9000 return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s)); \
9001 } \
9002 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a) \
9003 { \
9004 return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s)); \
9005 } \
9006 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a) \
9007 { \
9008 return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s)); \
9009 } \
9010 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a) \
9011 { \
9012 return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s)); \
9013 }
9014
9015 DO_LDST(LDR, load, MO_UL)
9016 DO_LDST(LDRB, load, MO_UB)
9017 DO_LDST(LDRH, load, MO_UW)
9018 DO_LDST(LDRSB, load, MO_SB)
9019 DO_LDST(LDRSH, load, MO_SW)
9020
9021 DO_LDST(STR, store, MO_UL)
9022 DO_LDST(STRB, store, MO_UB)
9023 DO_LDST(STRH, store, MO_UW)
9024
9025 #undef DO_LDST
9026
9027 /*
9028 * Synchronization primitives
9029 */
9030
9031 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
9032 {
9033 TCGv_i32 addr, tmp;
9034 TCGv taddr;
9035
9036 opc |= s->be_data;
9037 addr = load_reg(s, a->rn);
9038 taddr = gen_aa32_addr(s, addr, opc);
9039 tcg_temp_free_i32(addr);
9040
9041 tmp = load_reg(s, a->rt2);
9042 tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
9043 tcg_temp_free(taddr);
9044
9045 store_reg(s, a->rt, tmp);
9046 return true;
9047 }
9048
9049 static bool trans_SWP(DisasContext *s, arg_SWP *a)
9050 {
9051 return op_swp(s, a, MO_UL | MO_ALIGN);
9052 }
9053
9054 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
9055 {
9056 return op_swp(s, a, MO_UB);
9057 }
9058
9059 /*
9060 * Load/Store Exclusive and Load-Acquire/Store-Release
9061 */
9062
9063 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
9064 {
9065 TCGv_i32 addr;
9066 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9067 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9068
9069 /* We UNDEF for these UNPREDICTABLE cases. */
9070 if (a->rd == 15 || a->rn == 15 || a->rt == 15
9071 || a->rd == a->rn || a->rd == a->rt
9072 || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
9073 || (mop == MO_64
9074 && (a->rt2 == 15
9075 || a->rd == a->rt2
9076 || (!v8a && s->thumb && a->rt2 == 13)))) {
9077 unallocated_encoding(s);
9078 return true;
9079 }
9080
9081 if (rel) {
9082 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9083 }
9084
9085 addr = tcg_temp_local_new_i32();
9086 load_reg_var(s, addr, a->rn);
9087 tcg_gen_addi_i32(addr, addr, a->imm);
9088
9089 gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
9090 tcg_temp_free_i32(addr);
9091 return true;
9092 }
9093
9094 static bool trans_STREX(DisasContext *s, arg_STREX *a)
9095 {
9096 if (!ENABLE_ARCH_6) {
9097 return false;
9098 }
9099 return op_strex(s, a, MO_32, false);
9100 }
9101
9102 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
9103 {
9104 if (!ENABLE_ARCH_6K) {
9105 return false;
9106 }
9107 /* We UNDEF for these UNPREDICTABLE cases. */
9108 if (a->rt & 1) {
9109 unallocated_encoding(s);
9110 return true;
9111 }
9112 a->rt2 = a->rt + 1;
9113 return op_strex(s, a, MO_64, false);
9114 }
9115
9116 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
9117 {
9118 return op_strex(s, a, MO_64, false);
9119 }
9120
9121 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
9122 {
9123 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9124 return false;
9125 }
9126 return op_strex(s, a, MO_8, false);
9127 }
9128
9129 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
9130 {
9131 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9132 return false;
9133 }
9134 return op_strex(s, a, MO_16, false);
9135 }
9136
9137 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
9138 {
9139 if (!ENABLE_ARCH_8) {
9140 return false;
9141 }
9142 return op_strex(s, a, MO_32, true);
9143 }
9144
9145 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
9146 {
9147 if (!ENABLE_ARCH_8) {
9148 return false;
9149 }
9150 /* We UNDEF for these UNPREDICTABLE cases. */
9151 if (a->rt & 1) {
9152 unallocated_encoding(s);
9153 return true;
9154 }
9155 a->rt2 = a->rt + 1;
9156 return op_strex(s, a, MO_64, true);
9157 }
9158
9159 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
9160 {
9161 if (!ENABLE_ARCH_8) {
9162 return false;
9163 }
9164 return op_strex(s, a, MO_64, true);
9165 }
9166
9167 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
9168 {
9169 if (!ENABLE_ARCH_8) {
9170 return false;
9171 }
9172 return op_strex(s, a, MO_8, true);
9173 }
9174
9175 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
9176 {
9177 if (!ENABLE_ARCH_8) {
9178 return false;
9179 }
9180 return op_strex(s, a, MO_16, true);
9181 }
9182
9183 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
9184 {
9185 TCGv_i32 addr, tmp;
9186
9187 if (!ENABLE_ARCH_8) {
9188 return false;
9189 }
9190 /* We UNDEF for these UNPREDICTABLE cases. */
9191 if (a->rn == 15 || a->rt == 15) {
9192 unallocated_encoding(s);
9193 return true;
9194 }
9195
9196 addr = load_reg(s, a->rn);
9197 tmp = load_reg(s, a->rt);
9198 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9199 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9200 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
9201
9202 tcg_temp_free_i32(tmp);
9203 tcg_temp_free_i32(addr);
9204 return true;
9205 }
9206
9207 static bool trans_STL(DisasContext *s, arg_STL *a)
9208 {
9209 return op_stl(s, a, MO_UL);
9210 }
9211
9212 static bool trans_STLB(DisasContext *s, arg_STL *a)
9213 {
9214 return op_stl(s, a, MO_UB);
9215 }
9216
9217 static bool trans_STLH(DisasContext *s, arg_STL *a)
9218 {
9219 return op_stl(s, a, MO_UW);
9220 }
9221
9222 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
9223 {
9224 TCGv_i32 addr;
9225 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9226 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9227
9228 /* We UNDEF for these UNPREDICTABLE cases. */
9229 if (a->rn == 15 || a->rt == 15
9230 || (!v8a && s->thumb && a->rt == 13)
9231 || (mop == MO_64
9232 && (a->rt2 == 15 || a->rt == a->rt2
9233 || (!v8a && s->thumb && a->rt2 == 13)))) {
9234 unallocated_encoding(s);
9235 return true;
9236 }
9237
9238 addr = tcg_temp_local_new_i32();
9239 load_reg_var(s, addr, a->rn);
9240 tcg_gen_addi_i32(addr, addr, a->imm);
9241
9242 gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9243 tcg_temp_free_i32(addr);
9244
9245 if (acq) {
9246 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9247 }
9248 return true;
9249 }
9250
9251 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9252 {
9253 if (!ENABLE_ARCH_6) {
9254 return false;
9255 }
9256 return op_ldrex(s, a, MO_32, false);
9257 }
9258
9259 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9260 {
9261 if (!ENABLE_ARCH_6K) {
9262 return false;
9263 }
9264 /* We UNDEF for these UNPREDICTABLE cases. */
9265 if (a->rt & 1) {
9266 unallocated_encoding(s);
9267 return true;
9268 }
9269 a->rt2 = a->rt + 1;
9270 return op_ldrex(s, a, MO_64, false);
9271 }
9272
9273 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9274 {
9275 return op_ldrex(s, a, MO_64, false);
9276 }
9277
9278 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9279 {
9280 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9281 return false;
9282 }
9283 return op_ldrex(s, a, MO_8, false);
9284 }
9285
9286 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9287 {
9288 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9289 return false;
9290 }
9291 return op_ldrex(s, a, MO_16, false);
9292 }
9293
9294 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9295 {
9296 if (!ENABLE_ARCH_8) {
9297 return false;
9298 }
9299 return op_ldrex(s, a, MO_32, true);
9300 }
9301
9302 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9303 {
9304 if (!ENABLE_ARCH_8) {
9305 return false;
9306 }
9307 /* We UNDEF for these UNPREDICTABLE cases. */
9308 if (a->rt & 1) {
9309 unallocated_encoding(s);
9310 return true;
9311 }
9312 a->rt2 = a->rt + 1;
9313 return op_ldrex(s, a, MO_64, true);
9314 }
9315
9316 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9317 {
9318 if (!ENABLE_ARCH_8) {
9319 return false;
9320 }
9321 return op_ldrex(s, a, MO_64, true);
9322 }
9323
9324 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9325 {
9326 if (!ENABLE_ARCH_8) {
9327 return false;
9328 }
9329 return op_ldrex(s, a, MO_8, true);
9330 }
9331
9332 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9333 {
9334 if (!ENABLE_ARCH_8) {
9335 return false;
9336 }
9337 return op_ldrex(s, a, MO_16, true);
9338 }
9339
9340 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9341 {
9342 TCGv_i32 addr, tmp;
9343
9344 if (!ENABLE_ARCH_8) {
9345 return false;
9346 }
9347 /* We UNDEF for these UNPREDICTABLE cases. */
9348 if (a->rn == 15 || a->rt == 15) {
9349 unallocated_encoding(s);
9350 return true;
9351 }
9352
9353 addr = load_reg(s, a->rn);
9354 tmp = tcg_temp_new_i32();
9355 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9356 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9357 tcg_temp_free_i32(addr);
9358
9359 store_reg(s, a->rt, tmp);
9360 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9361 return true;
9362 }
9363
9364 static bool trans_LDA(DisasContext *s, arg_LDA *a)
9365 {
9366 return op_lda(s, a, MO_UL);
9367 }
9368
9369 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9370 {
9371 return op_lda(s, a, MO_UB);
9372 }
9373
9374 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9375 {
9376 return op_lda(s, a, MO_UW);
9377 }
9378
9379 /*
9380 * Media instructions
9381 */
9382
9383 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9384 {
9385 TCGv_i32 t1, t2;
9386
9387 if (!ENABLE_ARCH_6) {
9388 return false;
9389 }
9390
9391 t1 = load_reg(s, a->rn);
9392 t2 = load_reg(s, a->rm);
9393 gen_helper_usad8(t1, t1, t2);
9394 tcg_temp_free_i32(t2);
9395 if (a->ra != 15) {
9396 t2 = load_reg(s, a->ra);
9397 tcg_gen_add_i32(t1, t1, t2);
9398 tcg_temp_free_i32(t2);
9399 }
9400 store_reg(s, a->rd, t1);
9401 return true;
9402 }
9403
9404 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9405 {
9406 TCGv_i32 tmp;
9407 int width = a->widthm1 + 1;
9408 int shift = a->lsb;
9409
9410 if (!ENABLE_ARCH_6T2) {
9411 return false;
9412 }
9413 if (shift + width > 32) {
9414 /* UNPREDICTABLE; we choose to UNDEF */
9415 unallocated_encoding(s);
9416 return true;
9417 }
9418
9419 tmp = load_reg(s, a->rn);
9420 if (u) {
9421 tcg_gen_extract_i32(tmp, tmp, shift, width);
9422 } else {
9423 tcg_gen_sextract_i32(tmp, tmp, shift, width);
9424 }
9425 store_reg(s, a->rd, tmp);
9426 return true;
9427 }
9428
9429 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9430 {
9431 return op_bfx(s, a, false);
9432 }
9433
9434 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9435 {
9436 return op_bfx(s, a, true);
9437 }
9438
9439 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9440 {
9441 TCGv_i32 tmp;
9442 int msb = a->msb, lsb = a->lsb;
9443 int width;
9444
9445 if (!ENABLE_ARCH_6T2) {
9446 return false;
9447 }
9448 if (msb < lsb) {
9449 /* UNPREDICTABLE; we choose to UNDEF */
9450 unallocated_encoding(s);
9451 return true;
9452 }
9453
9454 width = msb + 1 - lsb;
9455 if (a->rn == 15) {
9456 /* BFC */
9457 tmp = tcg_const_i32(0);
9458 } else {
9459 /* BFI */
9460 tmp = load_reg(s, a->rn);
9461 }
9462 if (width != 32) {
9463 TCGv_i32 tmp2 = load_reg(s, a->rd);
9464 tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9465 tcg_temp_free_i32(tmp2);
9466 }
9467 store_reg(s, a->rd, tmp);
9468 return true;
9469 }
9470
9471 static bool trans_UDF(DisasContext *s, arg_UDF *a)
9472 {
9473 unallocated_encoding(s);
9474 return true;
9475 }
9476
9477 /*
9478 * Parallel addition and subtraction
9479 */
9480
9481 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9482 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9483 {
9484 TCGv_i32 t0, t1;
9485
9486 if (s->thumb
9487 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9488 : !ENABLE_ARCH_6) {
9489 return false;
9490 }
9491
9492 t0 = load_reg(s, a->rn);
9493 t1 = load_reg(s, a->rm);
9494
9495 gen(t0, t0, t1);
9496
9497 tcg_temp_free_i32(t1);
9498 store_reg(s, a->rd, t0);
9499 return true;
9500 }
9501
9502 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9503 void (*gen)(TCGv_i32, TCGv_i32,
9504 TCGv_i32, TCGv_ptr))
9505 {
9506 TCGv_i32 t0, t1;
9507 TCGv_ptr ge;
9508
9509 if (s->thumb
9510 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9511 : !ENABLE_ARCH_6) {
9512 return false;
9513 }
9514
9515 t0 = load_reg(s, a->rn);
9516 t1 = load_reg(s, a->rm);
9517
9518 ge = tcg_temp_new_ptr();
9519 tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9520 gen(t0, t0, t1, ge);
9521
9522 tcg_temp_free_ptr(ge);
9523 tcg_temp_free_i32(t1);
9524 store_reg(s, a->rd, t0);
9525 return true;
9526 }
9527
9528 #define DO_PAR_ADDSUB(NAME, helper) \
9529 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9530 { \
9531 return op_par_addsub(s, a, helper); \
9532 }
9533
9534 #define DO_PAR_ADDSUB_GE(NAME, helper) \
9535 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9536 { \
9537 return op_par_addsub_ge(s, a, helper); \
9538 }
9539
9540 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9541 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9542 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9543 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9544 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9545 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9546
9547 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9548 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9549 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9550 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9551 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9552 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9553
9554 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9555 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9556 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9557 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9558 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9559 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9560
9561 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9562 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9563 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9564 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9565 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9566 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9567
9568 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9569 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9570 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9571 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9572 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9573 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9574
9575 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9576 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9577 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9578 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9579 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9580 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9581
9582 #undef DO_PAR_ADDSUB
9583 #undef DO_PAR_ADDSUB_GE
9584
9585 /*
9586 * Packing, unpacking, saturation, and reversal
9587 */
9588
9589 static bool trans_PKH(DisasContext *s, arg_PKH *a)
9590 {
9591 TCGv_i32 tn, tm;
9592 int shift = a->imm;
9593
9594 if (s->thumb
9595 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9596 : !ENABLE_ARCH_6) {
9597 return false;
9598 }
9599
9600 tn = load_reg(s, a->rn);
9601 tm = load_reg(s, a->rm);
9602 if (a->tb) {
9603 /* PKHTB */
9604 if (shift == 0) {
9605 shift = 31;
9606 }
9607 tcg_gen_sari_i32(tm, tm, shift);
9608 tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9609 } else {
9610 /* PKHBT */
9611 tcg_gen_shli_i32(tm, tm, shift);
9612 tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9613 }
9614 tcg_temp_free_i32(tm);
9615 store_reg(s, a->rd, tn);
9616 return true;
9617 }
9618
9619 static bool op_sat(DisasContext *s, arg_sat *a,
9620 void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9621 {
9622 TCGv_i32 tmp, satimm;
9623 int shift = a->imm;
9624
9625 if (!ENABLE_ARCH_6) {
9626 return false;
9627 }
9628
9629 tmp = load_reg(s, a->rn);
9630 if (a->sh) {
9631 tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9632 } else {
9633 tcg_gen_shli_i32(tmp, tmp, shift);
9634 }
9635
9636 satimm = tcg_const_i32(a->satimm);
9637 gen(tmp, cpu_env, tmp, satimm);
9638 tcg_temp_free_i32(satimm);
9639
9640 store_reg(s, a->rd, tmp);
9641 return true;
9642 }
9643
9644 static bool trans_SSAT(DisasContext *s, arg_sat *a)
9645 {
9646 return op_sat(s, a, gen_helper_ssat);
9647 }
9648
9649 static bool trans_USAT(DisasContext *s, arg_sat *a)
9650 {
9651 return op_sat(s, a, gen_helper_usat);
9652 }
9653
9654 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9655 {
9656 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9657 return false;
9658 }
9659 return op_sat(s, a, gen_helper_ssat16);
9660 }
9661
9662 static bool trans_USAT16(DisasContext *s, arg_sat *a)
9663 {
9664 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9665 return false;
9666 }
9667 return op_sat(s, a, gen_helper_usat16);
9668 }
9669
9670 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9671 void (*gen_extract)(TCGv_i32, TCGv_i32),
9672 void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9673 {
9674 TCGv_i32 tmp;
9675
9676 if (!ENABLE_ARCH_6) {
9677 return false;
9678 }
9679
9680 tmp = load_reg(s, a->rm);
9681 /*
9682 * TODO: In many cases we could do a shift instead of a rotate.
9683 * Combined with a simple extend, that becomes an extract.
9684 */
9685 tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9686 gen_extract(tmp, tmp);
9687
9688 if (a->rn != 15) {
9689 TCGv_i32 tmp2 = load_reg(s, a->rn);
9690 gen_add(tmp, tmp, tmp2);
9691 tcg_temp_free_i32(tmp2);
9692 }
9693 store_reg(s, a->rd, tmp);
9694 return true;
9695 }
9696
9697 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9698 {
9699 return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9700 }
9701
9702 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9703 {
9704 return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9705 }
9706
9707 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9708 {
9709 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9710 return false;
9711 }
9712 return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9713 }
9714
9715 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9716 {
9717 return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9718 }
9719
9720 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9721 {
9722 return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9723 }
9724
9725 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9726 {
9727 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9728 return false;
9729 }
9730 return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9731 }
9732
9733 static bool trans_SEL(DisasContext *s, arg_rrr *a)
9734 {
9735 TCGv_i32 t1, t2, t3;
9736
9737 if (s->thumb
9738 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9739 : !ENABLE_ARCH_6) {
9740 return false;
9741 }
9742
9743 t1 = load_reg(s, a->rn);
9744 t2 = load_reg(s, a->rm);
9745 t3 = tcg_temp_new_i32();
9746 tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9747 gen_helper_sel_flags(t1, t3, t1, t2);
9748 tcg_temp_free_i32(t3);
9749 tcg_temp_free_i32(t2);
9750 store_reg(s, a->rd, t1);
9751 return true;
9752 }
9753
9754 static bool op_rr(DisasContext *s, arg_rr *a,
9755 void (*gen)(TCGv_i32, TCGv_i32))
9756 {
9757 TCGv_i32 tmp;
9758
9759 tmp = load_reg(s, a->rm);
9760 gen(tmp, tmp);
9761 store_reg(s, a->rd, tmp);
9762 return true;
9763 }
9764
9765 static bool trans_REV(DisasContext *s, arg_rr *a)
9766 {
9767 if (!ENABLE_ARCH_6) {
9768 return false;
9769 }
9770 return op_rr(s, a, tcg_gen_bswap32_i32);
9771 }
9772
9773 static bool trans_REV16(DisasContext *s, arg_rr *a)
9774 {
9775 if (!ENABLE_ARCH_6) {
9776 return false;
9777 }
9778 return op_rr(s, a, gen_rev16);
9779 }
9780
9781 static bool trans_REVSH(DisasContext *s, arg_rr *a)
9782 {
9783 if (!ENABLE_ARCH_6) {
9784 return false;
9785 }
9786 return op_rr(s, a, gen_revsh);
9787 }
9788
9789 static bool trans_RBIT(DisasContext *s, arg_rr *a)
9790 {
9791 if (!ENABLE_ARCH_6T2) {
9792 return false;
9793 }
9794 return op_rr(s, a, gen_helper_rbit);
9795 }
9796
9797 /*
9798 * Signed multiply, signed and unsigned divide
9799 */
9800
9801 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9802 {
9803 TCGv_i32 t1, t2;
9804
9805 if (!ENABLE_ARCH_6) {
9806 return false;
9807 }
9808
9809 t1 = load_reg(s, a->rn);
9810 t2 = load_reg(s, a->rm);
9811 if (m_swap) {
9812 gen_swap_half(t2);
9813 }
9814 gen_smul_dual(t1, t2);
9815
9816 if (sub) {
9817 /* This subtraction cannot overflow. */
9818 tcg_gen_sub_i32(t1, t1, t2);
9819 } else {
9820 /*
9821 * This addition cannot overflow 32 bits; however it may
9822 * overflow considered as a signed operation, in which case
9823 * we must set the Q flag.
9824 */
9825 gen_helper_add_setq(t1, cpu_env, t1, t2);
9826 }
9827 tcg_temp_free_i32(t2);
9828
9829 if (a->ra != 15) {
9830 t2 = load_reg(s, a->ra);
9831 gen_helper_add_setq(t1, cpu_env, t1, t2);
9832 tcg_temp_free_i32(t2);
9833 }
9834 store_reg(s, a->rd, t1);
9835 return true;
9836 }
9837
9838 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9839 {
9840 return op_smlad(s, a, false, false);
9841 }
9842
9843 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9844 {
9845 return op_smlad(s, a, true, false);
9846 }
9847
9848 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9849 {
9850 return op_smlad(s, a, false, true);
9851 }
9852
9853 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9854 {
9855 return op_smlad(s, a, true, true);
9856 }
9857
9858 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9859 {
9860 TCGv_i32 t1, t2;
9861 TCGv_i64 l1, l2;
9862
9863 if (!ENABLE_ARCH_6) {
9864 return false;
9865 }
9866
9867 t1 = load_reg(s, a->rn);
9868 t2 = load_reg(s, a->rm);
9869 if (m_swap) {
9870 gen_swap_half(t2);
9871 }
9872 gen_smul_dual(t1, t2);
9873
9874 l1 = tcg_temp_new_i64();
9875 l2 = tcg_temp_new_i64();
9876 tcg_gen_ext_i32_i64(l1, t1);
9877 tcg_gen_ext_i32_i64(l2, t2);
9878 tcg_temp_free_i32(t1);
9879 tcg_temp_free_i32(t2);
9880
9881 if (sub) {
9882 tcg_gen_sub_i64(l1, l1, l2);
9883 } else {
9884 tcg_gen_add_i64(l1, l1, l2);
9885 }
9886 tcg_temp_free_i64(l2);
9887
9888 gen_addq(s, l1, a->ra, a->rd);
9889 gen_storeq_reg(s, a->ra, a->rd, l1);
9890 tcg_temp_free_i64(l1);
9891 return true;
9892 }
9893
9894 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9895 {
9896 return op_smlald(s, a, false, false);
9897 }
9898
9899 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9900 {
9901 return op_smlald(s, a, true, false);
9902 }
9903
9904 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9905 {
9906 return op_smlald(s, a, false, true);
9907 }
9908
9909 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9910 {
9911 return op_smlald(s, a, true, true);
9912 }
9913
9914 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9915 {
9916 TCGv_i32 t1, t2;
9917
9918 if (s->thumb
9919 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9920 : !ENABLE_ARCH_6) {
9921 return false;
9922 }
9923
9924 t1 = load_reg(s, a->rn);
9925 t2 = load_reg(s, a->rm);
9926 tcg_gen_muls2_i32(t2, t1, t1, t2);
9927
9928 if (a->ra != 15) {
9929 TCGv_i32 t3 = load_reg(s, a->ra);
9930 if (sub) {
9931 /*
9932 * For SMMLS, we need a 64-bit subtract. Borrow caused by
9933 * a non-zero multiplicand lowpart, and the correct result
9934 * lowpart for rounding.
9935 */
9936 TCGv_i32 zero = tcg_const_i32(0);
9937 tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9938 tcg_temp_free_i32(zero);
9939 } else {
9940 tcg_gen_add_i32(t1, t1, t3);
9941 }
9942 tcg_temp_free_i32(t3);
9943 }
9944 if (round) {
9945 /*
9946 * Adding 0x80000000 to the 64-bit quantity means that we have
9947 * carry in to the high word when the low word has the msb set.
9948 */
9949 tcg_gen_shri_i32(t2, t2, 31);
9950 tcg_gen_add_i32(t1, t1, t2);
9951 }
9952 tcg_temp_free_i32(t2);
9953 store_reg(s, a->rd, t1);
9954 return true;
9955 }
9956
9957 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9958 {
9959 return op_smmla(s, a, false, false);
9960 }
9961
9962 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9963 {
9964 return op_smmla(s, a, true, false);
9965 }
9966
9967 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9968 {
9969 return op_smmla(s, a, false, true);
9970 }
9971
9972 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9973 {
9974 return op_smmla(s, a, true, true);
9975 }
9976
9977 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9978 {
9979 TCGv_i32 t1, t2;
9980
9981 if (s->thumb
9982 ? !dc_isar_feature(aa32_thumb_div, s)
9983 : !dc_isar_feature(aa32_arm_div, s)) {
9984 return false;
9985 }
9986
9987 t1 = load_reg(s, a->rn);
9988 t2 = load_reg(s, a->rm);
9989 if (u) {
9990 gen_helper_udiv(t1, t1, t2);
9991 } else {
9992 gen_helper_sdiv(t1, t1, t2);
9993 }
9994 tcg_temp_free_i32(t2);
9995 store_reg(s, a->rd, t1);
9996 return true;
9997 }
9998
9999 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
10000 {
10001 return op_div(s, a, false);
10002 }
10003
10004 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
10005 {
10006 return op_div(s, a, true);
10007 }
10008
10009 /*
10010 * Block data transfer
10011 */
10012
10013 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
10014 {
10015 TCGv_i32 addr = load_reg(s, a->rn);
10016
10017 if (a->b) {
10018 if (a->i) {
10019 /* pre increment */
10020 tcg_gen_addi_i32(addr, addr, 4);
10021 } else {
10022 /* pre decrement */
10023 tcg_gen_addi_i32(addr, addr, -(n * 4));
10024 }
10025 } else if (!a->i && n != 1) {
10026 /* post decrement */
10027 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10028 }
10029
10030 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
10031 /*
10032 * If the writeback is incrementing SP rather than
10033 * decrementing it, and the initial SP is below the
10034 * stack limit but the final written-back SP would
10035 * be above, then then we must not perform any memory
10036 * accesses, but it is IMPDEF whether we generate
10037 * an exception. We choose to do so in this case.
10038 * At this point 'addr' is the lowest address, so
10039 * either the original SP (if incrementing) or our
10040 * final SP (if decrementing), so that's what we check.
10041 */
10042 gen_helper_v8m_stackcheck(cpu_env, addr);
10043 }
10044
10045 return addr;
10046 }
10047
10048 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
10049 TCGv_i32 addr, int n)
10050 {
10051 if (a->w) {
10052 /* write back */
10053 if (!a->b) {
10054 if (a->i) {
10055 /* post increment */
10056 tcg_gen_addi_i32(addr, addr, 4);
10057 } else {
10058 /* post decrement */
10059 tcg_gen_addi_i32(addr, addr, -(n * 4));
10060 }
10061 } else if (!a->i && n != 1) {
10062 /* pre decrement */
10063 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10064 }
10065 store_reg(s, a->rn, addr);
10066 } else {
10067 tcg_temp_free_i32(addr);
10068 }
10069 }
10070
10071 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
10072 {
10073 int i, j, n, list, mem_idx;
10074 bool user = a->u;
10075 TCGv_i32 addr, tmp, tmp2;
10076
10077 if (user) {
10078 /* STM (user) */
10079 if (IS_USER(s)) {
10080 /* Only usable in supervisor mode. */
10081 unallocated_encoding(s);
10082 return true;
10083 }
10084 }
10085
10086 list = a->list;
10087 n = ctpop16(list);
10088 if (n < min_n || a->rn == 15) {
10089 unallocated_encoding(s);
10090 return true;
10091 }
10092
10093 addr = op_addr_block_pre(s, a, n);
10094 mem_idx = get_mem_index(s);
10095
10096 for (i = j = 0; i < 16; i++) {
10097 if (!(list & (1 << i))) {
10098 continue;
10099 }
10100
10101 if (user && i != 15) {
10102 tmp = tcg_temp_new_i32();
10103 tmp2 = tcg_const_i32(i);
10104 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
10105 tcg_temp_free_i32(tmp2);
10106 } else {
10107 tmp = load_reg(s, i);
10108 }
10109 gen_aa32_st32(s, tmp, addr, mem_idx);
10110 tcg_temp_free_i32(tmp);
10111
10112 /* No need to add after the last transfer. */
10113 if (++j != n) {
10114 tcg_gen_addi_i32(addr, addr, 4);
10115 }
10116 }
10117
10118 op_addr_block_post(s, a, addr, n);
10119 return true;
10120 }
10121
10122 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
10123 {
10124 /* BitCount(list) < 1 is UNPREDICTABLE */
10125 return op_stm(s, a, 1);
10126 }
10127
10128 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
10129 {
10130 /* Writeback register in register list is UNPREDICTABLE for T32. */
10131 if (a->w && (a->list & (1 << a->rn))) {
10132 unallocated_encoding(s);
10133 return true;
10134 }
10135 /* BitCount(list) < 2 is UNPREDICTABLE */
10136 return op_stm(s, a, 2);
10137 }
10138
10139 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
10140 {
10141 int i, j, n, list, mem_idx;
10142 bool loaded_base;
10143 bool user = a->u;
10144 bool exc_return = false;
10145 TCGv_i32 addr, tmp, tmp2, loaded_var;
10146
10147 if (user) {
10148 /* LDM (user), LDM (exception return) */
10149 if (IS_USER(s)) {
10150 /* Only usable in supervisor mode. */
10151 unallocated_encoding(s);
10152 return true;
10153 }
10154 if (extract32(a->list, 15, 1)) {
10155 exc_return = true;
10156 user = false;
10157 } else {
10158 /* LDM (user) does not allow writeback. */
10159 if (a->w) {
10160 unallocated_encoding(s);
10161 return true;
10162 }
10163 }
10164 }
10165
10166 list = a->list;
10167 n = ctpop16(list);
10168 if (n < min_n || a->rn == 15) {
10169 unallocated_encoding(s);
10170 return true;
10171 }
10172
10173 addr = op_addr_block_pre(s, a, n);
10174 mem_idx = get_mem_index(s);
10175 loaded_base = false;
10176 loaded_var = NULL;
10177
10178 for (i = j = 0; i < 16; i++) {
10179 if (!(list & (1 << i))) {
10180 continue;
10181 }
10182
10183 tmp = tcg_temp_new_i32();
10184 gen_aa32_ld32u(s, tmp, addr, mem_idx);
10185 if (user) {
10186 tmp2 = tcg_const_i32(i);
10187 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10188 tcg_temp_free_i32(tmp2);
10189 tcg_temp_free_i32(tmp);
10190 } else if (i == a->rn) {
10191 loaded_var = tmp;
10192 loaded_base = true;
10193 } else if (i == 15 && exc_return) {
10194 store_pc_exc_ret(s, tmp);
10195 } else {
10196 store_reg_from_load(s, i, tmp);
10197 }
10198
10199 /* No need to add after the last transfer. */
10200 if (++j != n) {
10201 tcg_gen_addi_i32(addr, addr, 4);
10202 }
10203 }
10204
10205 op_addr_block_post(s, a, addr, n);
10206
10207 if (loaded_base) {
10208 /* Note that we reject base == pc above. */
10209 store_reg(s, a->rn, loaded_var);
10210 }
10211
10212 if (exc_return) {
10213 /* Restore CPSR from SPSR. */
10214 tmp = load_cpu_field(spsr);
10215 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10216 gen_io_start();
10217 }
10218 gen_helper_cpsr_write_eret(cpu_env, tmp);
10219 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10220 gen_io_end();
10221 }
10222 tcg_temp_free_i32(tmp);
10223 /* Must exit loop to check un-masked IRQs */
10224 s->base.is_jmp = DISAS_EXIT;
10225 }
10226 return true;
10227 }
10228
10229 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
10230 {
10231 /*
10232 * Writeback register in register list is UNPREDICTABLE
10233 * for ArchVersion() >= 7. Prior to v7, A32 would write
10234 * an UNKNOWN value to the base register.
10235 */
10236 if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10237 unallocated_encoding(s);
10238 return true;
10239 }
10240 /* BitCount(list) < 1 is UNPREDICTABLE */
10241 return do_ldm(s, a, 1);
10242 }
10243
10244 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10245 {
10246 /* Writeback register in register list is UNPREDICTABLE for T32. */
10247 if (a->w && (a->list & (1 << a->rn))) {
10248 unallocated_encoding(s);
10249 return true;
10250 }
10251 /* BitCount(list) < 2 is UNPREDICTABLE */
10252 return do_ldm(s, a, 2);
10253 }
10254
10255 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10256 {
10257 /* Writeback is conditional on the base register not being loaded. */
10258 a->w = !(a->list & (1 << a->rn));
10259 /* BitCount(list) < 1 is UNPREDICTABLE */
10260 return do_ldm(s, a, 1);
10261 }
10262
10263 /*
10264 * Branch, branch with link
10265 */
10266
10267 static bool trans_B(DisasContext *s, arg_i *a)
10268 {
10269 gen_jmp(s, read_pc(s) + a->imm);
10270 return true;
10271 }
10272
10273 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10274 {
10275 /* This has cond from encoding, required to be outside IT block. */
10276 if (a->cond >= 0xe) {
10277 return false;
10278 }
10279 if (s->condexec_mask) {
10280 unallocated_encoding(s);
10281 return true;
10282 }
10283 arm_skip_unless(s, a->cond);
10284 gen_jmp(s, read_pc(s) + a->imm);
10285 return true;
10286 }
10287
10288 static bool trans_BL(DisasContext *s, arg_i *a)
10289 {
10290 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10291 gen_jmp(s, read_pc(s) + a->imm);
10292 return true;
10293 }
10294
10295 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10296 {
10297 TCGv_i32 tmp;
10298
10299 /* For A32, ARCH(5) is checked near the start of the uncond block. */
10300 if (s->thumb && (a->imm & 2)) {
10301 return false;
10302 }
10303 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10304 tmp = tcg_const_i32(!s->thumb);
10305 store_cpu_field(tmp, thumb);
10306 gen_jmp(s, (read_pc(s) & ~3) + a->imm);
10307 return true;
10308 }
10309
10310 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
10311 {
10312 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10313 tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
10314 return true;
10315 }
10316
10317 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
10318 {
10319 TCGv_i32 tmp = tcg_temp_new_i32();
10320
10321 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10322 tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
10323 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10324 gen_bx(s, tmp);
10325 return true;
10326 }
10327
10328 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
10329 {
10330 TCGv_i32 tmp;
10331
10332 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10333 if (!ENABLE_ARCH_5) {
10334 return false;
10335 }
10336 tmp = tcg_temp_new_i32();
10337 tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
10338 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10339 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10340 gen_bx(s, tmp);
10341 return true;
10342 }
10343
10344 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10345 {
10346 TCGv_i32 addr, tmp;
10347
10348 tmp = load_reg(s, a->rm);
10349 if (half) {
10350 tcg_gen_add_i32(tmp, tmp, tmp);
10351 }
10352 addr = load_reg(s, a->rn);
10353 tcg_gen_add_i32(addr, addr, tmp);
10354
10355 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10356 half ? MO_UW | s->be_data : MO_UB);
10357 tcg_temp_free_i32(addr);
10358
10359 tcg_gen_add_i32(tmp, tmp, tmp);
10360 tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10361 store_reg(s, 15, tmp);
10362 return true;
10363 }
10364
10365 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10366 {
10367 return op_tbranch(s, a, false);
10368 }
10369
10370 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10371 {
10372 return op_tbranch(s, a, true);
10373 }
10374
10375 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10376 {
10377 TCGv_i32 tmp = load_reg(s, a->rn);
10378
10379 arm_gen_condlabel(s);
10380 tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10381 tmp, 0, s->condlabel);
10382 tcg_temp_free_i32(tmp);
10383 gen_jmp(s, read_pc(s) + a->imm);
10384 return true;
10385 }
10386
10387 /*
10388 * Supervisor call - both T32 & A32 come here so we need to check
10389 * which mode we are in when checking for semihosting.
10390 */
10391
10392 static bool trans_SVC(DisasContext *s, arg_SVC *a)
10393 {
10394 const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
10395
10396 if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
10397 #ifndef CONFIG_USER_ONLY
10398 !IS_USER(s) &&
10399 #endif
10400 (a->imm == semihost_imm)) {
10401 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
10402 } else {
10403 gen_set_pc_im(s, s->base.pc_next);
10404 s->svc_imm = a->imm;
10405 s->base.is_jmp = DISAS_SWI;
10406 }
10407 return true;
10408 }
10409
10410 /*
10411 * Unconditional system instructions
10412 */
10413
10414 static bool trans_RFE(DisasContext *s, arg_RFE *a)
10415 {
10416 static const int8_t pre_offset[4] = {
10417 /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10418 };
10419 static const int8_t post_offset[4] = {
10420 /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10421 };
10422 TCGv_i32 addr, t1, t2;
10423
10424 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10425 return false;
10426 }
10427 if (IS_USER(s)) {
10428 unallocated_encoding(s);
10429 return true;
10430 }
10431
10432 addr = load_reg(s, a->rn);
10433 tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10434
10435 /* Load PC into tmp and CPSR into tmp2. */
10436 t1 = tcg_temp_new_i32();
10437 gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10438 tcg_gen_addi_i32(addr, addr, 4);
10439 t2 = tcg_temp_new_i32();
10440 gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10441
10442 if (a->w) {
10443 /* Base writeback. */
10444 tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10445 store_reg(s, a->rn, addr);
10446 } else {
10447 tcg_temp_free_i32(addr);
10448 }
10449 gen_rfe(s, t1, t2);
10450 return true;
10451 }
10452
10453 static bool trans_SRS(DisasContext *s, arg_SRS *a)
10454 {
10455 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10456 return false;
10457 }
10458 gen_srs(s, a->mode, a->pu, a->w);
10459 return true;
10460 }
10461
10462 static bool trans_CPS(DisasContext *s, arg_CPS *a)
10463 {
10464 uint32_t mask, val;
10465
10466 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10467 return false;
10468 }
10469 if (IS_USER(s)) {
10470 /* Implemented as NOP in user mode. */
10471 return true;
10472 }
10473 /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10474
10475 mask = val = 0;
10476 if (a->imod & 2) {
10477 if (a->A) {
10478 mask |= CPSR_A;
10479 }
10480 if (a->I) {
10481 mask |= CPSR_I;
10482 }
10483 if (a->F) {
10484 mask |= CPSR_F;
10485 }
10486 if (a->imod & 1) {
10487 val |= mask;
10488 }
10489 }
10490 if (a->M) {
10491 mask |= CPSR_M;
10492 val |= a->mode;
10493 }
10494 if (mask) {
10495 gen_set_psr_im(s, mask, 0, val);
10496 }
10497 return true;
10498 }
10499
10500 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10501 {
10502 TCGv_i32 tmp, addr, el;
10503
10504 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10505 return false;
10506 }
10507 if (IS_USER(s)) {
10508 /* Implemented as NOP in user mode. */
10509 return true;
10510 }
10511
10512 tmp = tcg_const_i32(a->im);
10513 /* FAULTMASK */
10514 if (a->F) {
10515 addr = tcg_const_i32(19);
10516 gen_helper_v7m_msr(cpu_env, addr, tmp);
10517 tcg_temp_free_i32(addr);
10518 }
10519 /* PRIMASK */
10520 if (a->I) {
10521 addr = tcg_const_i32(16);
10522 gen_helper_v7m_msr(cpu_env, addr, tmp);
10523 tcg_temp_free_i32(addr);
10524 }
10525 el = tcg_const_i32(s->current_el);
10526 gen_helper_rebuild_hflags_m32(cpu_env, el);
10527 tcg_temp_free_i32(el);
10528 tcg_temp_free_i32(tmp);
10529 gen_lookup_tb(s);
10530 return true;
10531 }
10532
10533 /*
10534 * Clear-Exclusive, Barriers
10535 */
10536
10537 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10538 {
10539 if (s->thumb
10540 ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10541 : !ENABLE_ARCH_6K) {
10542 return false;
10543 }
10544 gen_clrex(s);
10545 return true;
10546 }
10547
10548 static bool trans_DSB(DisasContext *s, arg_DSB *a)
10549 {
10550 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10551 return false;
10552 }
10553 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10554 return true;
10555 }
10556
10557 static bool trans_DMB(DisasContext *s, arg_DMB *a)
10558 {
10559 return trans_DSB(s, NULL);
10560 }
10561
10562 static bool trans_ISB(DisasContext *s, arg_ISB *a)
10563 {
10564 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10565 return false;
10566 }
10567 /*
10568 * We need to break the TB after this insn to execute
10569 * self-modifying code correctly and also to take
10570 * any pending interrupts immediately.
10571 */
10572 gen_goto_tb(s, 0, s->base.pc_next);
10573 return true;
10574 }
10575
10576 static bool trans_SB(DisasContext *s, arg_SB *a)
10577 {
10578 if (!dc_isar_feature(aa32_sb, s)) {
10579 return false;
10580 }
10581 /*
10582 * TODO: There is no speculation barrier opcode
10583 * for TCG; MB and end the TB instead.
10584 */
10585 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10586 gen_goto_tb(s, 0, s->base.pc_next);
10587 return true;
10588 }
10589
10590 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10591 {
10592 if (!ENABLE_ARCH_6) {
10593 return false;
10594 }
10595 if (a->E != (s->be_data == MO_BE)) {
10596 gen_helper_setend(cpu_env);
10597 s->base.is_jmp = DISAS_UPDATE;
10598 }
10599 return true;
10600 }
10601
10602 /*
10603 * Preload instructions
10604 * All are nops, contingent on the appropriate arch level.
10605 */
10606
10607 static bool trans_PLD(DisasContext *s, arg_PLD *a)
10608 {
10609 return ENABLE_ARCH_5TE;
10610 }
10611
10612 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10613 {
10614 return arm_dc_feature(s, ARM_FEATURE_V7MP);
10615 }
10616
10617 static bool trans_PLI(DisasContext *s, arg_PLD *a)
10618 {
10619 return ENABLE_ARCH_7;
10620 }
10621
10622 /*
10623 * If-then
10624 */
10625
10626 static bool trans_IT(DisasContext *s, arg_IT *a)
10627 {
10628 int cond_mask = a->cond_mask;
10629
10630 /*
10631 * No actual code generated for this insn, just setup state.
10632 *
10633 * Combinations of firstcond and mask which set up an 0b1111
10634 * condition are UNPREDICTABLE; we take the CONSTRAINED
10635 * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10636 * i.e. both meaning "execute always".
10637 */
10638 s->condexec_cond = (cond_mask >> 4) & 0xe;
10639 s->condexec_mask = cond_mask & 0x1f;
10640 return true;
10641 }
10642
10643 /*
10644 * Legacy decoder.
10645 */
10646
10647 static void disas_arm_insn(DisasContext *s, unsigned int insn)
10648 {
10649 unsigned int cond = insn >> 28;
10650
10651 /* M variants do not implement ARM mode; this must raise the INVSTATE
10652 * UsageFault exception.
10653 */
10654 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10655 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10656 default_exception_el(s));
10657 return;
10658 }
10659
10660 if (cond == 0xf) {
10661 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10662 * choose to UNDEF. In ARMv5 and above the space is used
10663 * for miscellaneous unconditional instructions.
10664 */
10665 ARCH(5);
10666
10667 /* Unconditional instructions. */
10668 /* TODO: Perhaps merge these into one decodetree output file. */
10669 if (disas_a32_uncond(s, insn) ||
10670 disas_vfp_uncond(s, insn) ||
10671 disas_neon_dp(s, insn) ||
10672 disas_neon_ls(s, insn) ||
10673 disas_neon_shared(s, insn)) {
10674 return;
10675 }
10676 /* fall back to legacy decoder */
10677
10678 if (((insn >> 25) & 7) == 1) {
10679 /* NEON Data processing. */
10680 if (disas_neon_data_insn(s, insn)) {
10681 goto illegal_op;
10682 }
10683 return;
10684 }
10685 if ((insn & 0x0e000f00) == 0x0c000100) {
10686 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10687 /* iWMMXt register transfer. */
10688 if (extract32(s->c15_cpar, 1, 1)) {
10689 if (!disas_iwmmxt_insn(s, insn)) {
10690 return;
10691 }
10692 }
10693 }
10694 }
10695 goto illegal_op;
10696 }
10697 if (cond != 0xe) {
10698 /* if not always execute, we generate a conditional jump to
10699 next instruction */
10700 arm_skip_unless(s, cond);
10701 }
10702
10703 /* TODO: Perhaps merge these into one decodetree output file. */
10704 if (disas_a32(s, insn) ||
10705 disas_vfp(s, insn)) {
10706 return;
10707 }
10708 /* fall back to legacy decoder */
10709
10710 switch ((insn >> 24) & 0xf) {
10711 case 0xc:
10712 case 0xd:
10713 case 0xe:
10714 if (((insn >> 8) & 0xe) == 10) {
10715 /* VFP, but failed disas_vfp. */
10716 goto illegal_op;
10717 }
10718 if (disas_coproc_insn(s, insn)) {
10719 /* Coprocessor. */
10720 goto illegal_op;
10721 }
10722 break;
10723 default:
10724 illegal_op:
10725 unallocated_encoding(s);
10726 break;
10727 }
10728 }
10729
10730 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10731 {
10732 /*
10733 * Return true if this is a 16 bit instruction. We must be precise
10734 * about this (matching the decode).
10735 */
10736 if ((insn >> 11) < 0x1d) {
10737 /* Definitely a 16-bit instruction */
10738 return true;
10739 }
10740
10741 /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10742 * first half of a 32-bit Thumb insn. Thumb-1 cores might
10743 * end up actually treating this as two 16-bit insns, though,
10744 * if it's half of a bl/blx pair that might span a page boundary.
10745 */
10746 if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10747 arm_dc_feature(s, ARM_FEATURE_M)) {
10748 /* Thumb2 cores (including all M profile ones) always treat
10749 * 32-bit insns as 32-bit.
10750 */
10751 return false;
10752 }
10753
10754 if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10755 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10756 * is not on the next page; we merge this into a 32-bit
10757 * insn.
10758 */
10759 return false;
10760 }
10761 /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10762 * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10763 * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10764 * -- handle as single 16 bit insn
10765 */
10766 return true;
10767 }
10768
10769 /* Translate a 32-bit thumb instruction. */
10770 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10771 {
10772 /*
10773 * ARMv6-M supports a limited subset of Thumb2 instructions.
10774 * Other Thumb1 architectures allow only 32-bit
10775 * combined BL/BLX prefix and suffix.
10776 */
10777 if (arm_dc_feature(s, ARM_FEATURE_M) &&
10778 !arm_dc_feature(s, ARM_FEATURE_V7)) {
10779 int i;
10780 bool found = false;
10781 static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10782 0xf3b08040 /* dsb */,
10783 0xf3b08050 /* dmb */,
10784 0xf3b08060 /* isb */,
10785 0xf3e08000 /* mrs */,
10786 0xf000d000 /* bl */};
10787 static const uint32_t armv6m_mask[] = {0xffe0d000,
10788 0xfff0d0f0,
10789 0xfff0d0f0,
10790 0xfff0d0f0,
10791 0xffe0d000,
10792 0xf800d000};
10793
10794 for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10795 if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10796 found = true;
10797 break;
10798 }
10799 }
10800 if (!found) {
10801 goto illegal_op;
10802 }
10803 } else if ((insn & 0xf800e800) != 0xf000e800) {
10804 ARCH(6T2);
10805 }
10806
10807 if ((insn & 0xef000000) == 0xef000000) {
10808 /*
10809 * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10810 * transform into
10811 * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10812 */
10813 uint32_t a32_insn = (insn & 0xe2ffffff) |
10814 ((insn & (1 << 28)) >> 4) | (1 << 28);
10815
10816 if (disas_neon_dp(s, a32_insn)) {
10817 return;
10818 }
10819 }
10820
10821 if ((insn & 0xff100000) == 0xf9000000) {
10822 /*
10823 * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10824 * transform into
10825 * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10826 */
10827 uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
10828
10829 if (disas_neon_ls(s, a32_insn)) {
10830 return;
10831 }
10832 }
10833
10834 /*
10835 * TODO: Perhaps merge these into one decodetree output file.
10836 * Note disas_vfp is written for a32 with cond field in the
10837 * top nibble. The t32 encoding requires 0xe in the top nibble.
10838 */
10839 if (disas_t32(s, insn) ||
10840 disas_vfp_uncond(s, insn) ||
10841 disas_neon_shared(s, insn) ||
10842 ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10843 return;
10844 }
10845 /* fall back to legacy decoder */
10846
10847 switch ((insn >> 25) & 0xf) {
10848 case 0: case 1: case 2: case 3:
10849 /* 16-bit instructions. Should never happen. */
10850 abort();
10851 case 6: case 7: case 14: case 15:
10852 /* Coprocessor. */
10853 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10854 /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10855 if (extract32(insn, 24, 2) == 3) {
10856 goto illegal_op; /* op0 = 0b11 : unallocated */
10857 }
10858
10859 if (((insn >> 8) & 0xe) == 10 &&
10860 dc_isar_feature(aa32_fpsp_v2, s)) {
10861 /* FP, and the CPU supports it */
10862 goto illegal_op;
10863 } else {
10864 /* All other insns: NOCP */
10865 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
10866 syn_uncategorized(),
10867 default_exception_el(s));
10868 }
10869 break;
10870 }
10871 if (((insn >> 24) & 3) == 3) {
10872 /* Translate into the equivalent ARM encoding. */
10873 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10874 if (disas_neon_data_insn(s, insn)) {
10875 goto illegal_op;
10876 }
10877 } else if (((insn >> 8) & 0xe) == 10) {
10878 /* VFP, but failed disas_vfp. */
10879 goto illegal_op;
10880 } else {
10881 if (insn & (1 << 28))
10882 goto illegal_op;
10883 if (disas_coproc_insn(s, insn)) {
10884 goto illegal_op;
10885 }
10886 }
10887 break;
10888 case 12:
10889 goto illegal_op;
10890 default:
10891 illegal_op:
10892 unallocated_encoding(s);
10893 }
10894 }
10895
10896 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10897 {
10898 if (!disas_t16(s, insn)) {
10899 unallocated_encoding(s);
10900 }
10901 }
10902
10903 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10904 {
10905 /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10906 * (False positives are OK, false negatives are not.)
10907 * We know this is a Thumb insn, and our caller ensures we are
10908 * only called if dc->base.pc_next is less than 4 bytes from the page
10909 * boundary, so we cross the page if the first 16 bits indicate
10910 * that this is a 32 bit insn.
10911 */
10912 uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10913
10914 return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10915 }
10916
10917 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10918 {
10919 DisasContext *dc = container_of(dcbase, DisasContext, base);
10920 CPUARMState *env = cs->env_ptr;
10921 ARMCPU *cpu = env_archcpu(env);
10922 uint32_t tb_flags = dc->base.tb->flags;
10923 uint32_t condexec, core_mmu_idx;
10924
10925 dc->isar = &cpu->isar;
10926 dc->condjmp = 0;
10927
10928 dc->aarch64 = 0;
10929 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10930 * there is no secure EL1, so we route exceptions to EL3.
10931 */
10932 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10933 !arm_el_is_aa64(env, 3);
10934 dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
10935 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10936 condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
10937 dc->condexec_mask = (condexec & 0xf) << 1;
10938 dc->condexec_cond = condexec >> 4;
10939
10940 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10941 dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10942 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10943 #if !defined(CONFIG_USER_ONLY)
10944 dc->user = (dc->current_el == 0);
10945 #endif
10946 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10947
10948 if (arm_feature(env, ARM_FEATURE_M)) {
10949 dc->vfp_enabled = 1;
10950 dc->be_data = MO_TE;
10951 dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
10952 dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10953 regime_is_secure(env, dc->mmu_idx);
10954 dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
10955 dc->v8m_fpccr_s_wrong =
10956 FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
10957 dc->v7m_new_fp_ctxt_needed =
10958 FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
10959 dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
10960 } else {
10961 dc->be_data =
10962 FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10963 dc->debug_target_el =
10964 FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10965 dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10966 dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
10967 dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10968 dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10969 if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10970 dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10971 } else {
10972 dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10973 dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10974 }
10975 }
10976 dc->cp_regs = cpu->cp_regs;
10977 dc->features = env->features;
10978
10979 /* Single step state. The code-generation logic here is:
10980 * SS_ACTIVE == 0:
10981 * generate code with no special handling for single-stepping (except
10982 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10983 * this happens anyway because those changes are all system register or
10984 * PSTATE writes).
10985 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10986 * emit code for one insn
10987 * emit code to clear PSTATE.SS
10988 * emit code to generate software step exception for completed step
10989 * end TB (as usual for having generated an exception)
10990 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10991 * emit code to generate a software step exception
10992 * end the TB
10993 */
10994 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10995 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10996 dc->is_ldex = false;
10997
10998 dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10999
11000 /* If architectural single step active, limit to 1. */
11001 if (is_singlestepping(dc)) {
11002 dc->base.max_insns = 1;
11003 }
11004
11005 /* ARM is a fixed-length ISA. Bound the number of insns to execute
11006 to those left on the page. */
11007 if (!dc->thumb) {
11008 int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11009 dc->base.max_insns = MIN(dc->base.max_insns, bound);
11010 }
11011
11012 cpu_V0 = tcg_temp_new_i64();
11013 cpu_V1 = tcg_temp_new_i64();
11014 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
11015 cpu_M0 = tcg_temp_new_i64();
11016 }
11017
11018 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
11019 {
11020 DisasContext *dc = container_of(dcbase, DisasContext, base);
11021
11022 /* A note on handling of the condexec (IT) bits:
11023 *
11024 * We want to avoid the overhead of having to write the updated condexec
11025 * bits back to the CPUARMState for every instruction in an IT block. So:
11026 * (1) if the condexec bits are not already zero then we write
11027 * zero back into the CPUARMState now. This avoids complications trying
11028 * to do it at the end of the block. (For example if we don't do this
11029 * it's hard to identify whether we can safely skip writing condexec
11030 * at the end of the TB, which we definitely want to do for the case
11031 * where a TB doesn't do anything with the IT state at all.)
11032 * (2) if we are going to leave the TB then we call gen_set_condexec()
11033 * which will write the correct value into CPUARMState if zero is wrong.
11034 * This is done both for leaving the TB at the end, and for leaving
11035 * it because of an exception we know will happen, which is done in
11036 * gen_exception_insn(). The latter is necessary because we need to
11037 * leave the TB with the PC/IT state just prior to execution of the
11038 * instruction which caused the exception.
11039 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
11040 * then the CPUARMState will be wrong and we need to reset it.
11041 * This is handled in the same way as restoration of the
11042 * PC in these situations; we save the value of the condexec bits
11043 * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
11044 * then uses this to restore them after an exception.
11045 *
11046 * Note that there are no instructions which can read the condexec
11047 * bits, and none which can write non-static values to them, so
11048 * we don't need to care about whether CPUARMState is correct in the
11049 * middle of a TB.
11050 */
11051
11052 /* Reset the conditional execution bits immediately. This avoids
11053 complications trying to do it at the end of the block. */
11054 if (dc->condexec_mask || dc->condexec_cond) {
11055 TCGv_i32 tmp = tcg_temp_new_i32();
11056 tcg_gen_movi_i32(tmp, 0);
11057 store_cpu_field(tmp, condexec_bits);
11058 }
11059 }
11060
11061 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
11062 {
11063 DisasContext *dc = container_of(dcbase, DisasContext, base);
11064
11065 tcg_gen_insn_start(dc->base.pc_next,
11066 (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
11067 0);
11068 dc->insn_start = tcg_last_op();
11069 }
11070
11071 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
11072 const CPUBreakpoint *bp)
11073 {
11074 DisasContext *dc = container_of(dcbase, DisasContext, base);
11075
11076 if (bp->flags & BP_CPU) {
11077 gen_set_condexec(dc);
11078 gen_set_pc_im(dc, dc->base.pc_next);
11079 gen_helper_check_breakpoints(cpu_env);
11080 /* End the TB early; it's likely not going to be executed */
11081 dc->base.is_jmp = DISAS_TOO_MANY;
11082 } else {
11083 gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
11084 /* The address covered by the breakpoint must be
11085 included in [tb->pc, tb->pc + tb->size) in order
11086 to for it to be properly cleared -- thus we
11087 increment the PC here so that the logic setting
11088 tb->size below does the right thing. */
11089 /* TODO: Advance PC by correct instruction length to
11090 * avoid disassembler error messages */
11091 dc->base.pc_next += 2;
11092 dc->base.is_jmp = DISAS_NORETURN;
11093 }
11094
11095 return true;
11096 }
11097
11098 static bool arm_pre_translate_insn(DisasContext *dc)
11099 {
11100 #ifdef CONFIG_USER_ONLY
11101 /* Intercept jump to the magic kernel page. */
11102 if (dc->base.pc_next >= 0xffff0000) {
11103 /* We always get here via a jump, so know we are not in a
11104 conditional execution block. */
11105 gen_exception_internal(EXCP_KERNEL_TRAP);
11106 dc->base.is_jmp = DISAS_NORETURN;
11107 return true;
11108 }
11109 #endif
11110
11111 if (dc->ss_active && !dc->pstate_ss) {
11112 /* Singlestep state is Active-pending.
11113 * If we're in this state at the start of a TB then either
11114 * a) we just took an exception to an EL which is being debugged
11115 * and this is the first insn in the exception handler
11116 * b) debug exceptions were masked and we just unmasked them
11117 * without changing EL (eg by clearing PSTATE.D)
11118 * In either case we're going to take a swstep exception in the
11119 * "did not step an insn" case, and so the syndrome ISV and EX
11120 * bits should be zero.
11121 */
11122 assert(dc->base.num_insns == 1);
11123 gen_swstep_exception(dc, 0, 0);
11124 dc->base.is_jmp = DISAS_NORETURN;
11125 return true;
11126 }
11127
11128 return false;
11129 }
11130
11131 static void arm_post_translate_insn(DisasContext *dc)
11132 {
11133 if (dc->condjmp && !dc->base.is_jmp) {
11134 gen_set_label(dc->condlabel);
11135 dc->condjmp = 0;
11136 }
11137 translator_loop_temp_check(&dc->base);
11138 }
11139
11140 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11141 {
11142 DisasContext *dc = container_of(dcbase, DisasContext, base);
11143 CPUARMState *env = cpu->env_ptr;
11144 unsigned int insn;
11145
11146 if (arm_pre_translate_insn(dc)) {
11147 return;
11148 }
11149
11150 dc->pc_curr = dc->base.pc_next;
11151 insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
11152 dc->insn = insn;
11153 dc->base.pc_next += 4;
11154 disas_arm_insn(dc, insn);
11155
11156 arm_post_translate_insn(dc);
11157
11158 /* ARM is a fixed-length ISA. We performed the cross-page check
11159 in init_disas_context by adjusting max_insns. */
11160 }
11161
11162 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
11163 {
11164 /* Return true if this Thumb insn is always unconditional,
11165 * even inside an IT block. This is true of only a very few
11166 * instructions: BKPT, HLT, and SG.
11167 *
11168 * A larger class of instructions are UNPREDICTABLE if used
11169 * inside an IT block; we do not need to detect those here, because
11170 * what we do by default (perform the cc check and update the IT
11171 * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
11172 * choice for those situations.
11173 *
11174 * insn is either a 16-bit or a 32-bit instruction; the two are
11175 * distinguishable because for the 16-bit case the top 16 bits
11176 * are zeroes, and that isn't a valid 32-bit encoding.
11177 */
11178 if ((insn & 0xffffff00) == 0xbe00) {
11179 /* BKPT */
11180 return true;
11181 }
11182
11183 if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
11184 !arm_dc_feature(s, ARM_FEATURE_M)) {
11185 /* HLT: v8A only. This is unconditional even when it is going to
11186 * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
11187 * For v7 cores this was a plain old undefined encoding and so
11188 * honours its cc check. (We might be using the encoding as
11189 * a semihosting trap, but we don't change the cc check behaviour
11190 * on that account, because a debugger connected to a real v7A
11191 * core and emulating semihosting traps by catching the UNDEF
11192 * exception would also only see cases where the cc check passed.
11193 * No guest code should be trying to do a HLT semihosting trap
11194 * in an IT block anyway.
11195 */
11196 return true;
11197 }
11198
11199 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
11200 arm_dc_feature(s, ARM_FEATURE_M)) {
11201 /* SG: v8M only */
11202 return true;
11203 }
11204
11205 return false;
11206 }
11207
11208 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11209 {
11210 DisasContext *dc = container_of(dcbase, DisasContext, base);
11211 CPUARMState *env = cpu->env_ptr;
11212 uint32_t insn;
11213 bool is_16bit;
11214
11215 if (arm_pre_translate_insn(dc)) {
11216 return;
11217 }
11218
11219 dc->pc_curr = dc->base.pc_next;
11220 insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11221 is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
11222 dc->base.pc_next += 2;
11223 if (!is_16bit) {
11224 uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11225
11226 insn = insn << 16 | insn2;
11227 dc->base.pc_next += 2;
11228 }
11229 dc->insn = insn;
11230
11231 if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
11232 uint32_t cond = dc->condexec_cond;
11233
11234 /*
11235 * Conditionally skip the insn. Note that both 0xe and 0xf mean
11236 * "always"; 0xf is not "never".
11237 */
11238 if (cond < 0x0e) {
11239 arm_skip_unless(dc, cond);
11240 }
11241 }
11242
11243 if (is_16bit) {
11244 disas_thumb_insn(dc, insn);
11245 } else {
11246 disas_thumb2_insn(dc, insn);
11247 }
11248
11249 /* Advance the Thumb condexec condition. */
11250 if (dc->condexec_mask) {
11251 dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11252 ((dc->condexec_mask >> 4) & 1));
11253 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11254 if (dc->condexec_mask == 0) {
11255 dc->condexec_cond = 0;
11256 }
11257 }
11258
11259 arm_post_translate_insn(dc);
11260
11261 /* Thumb is a variable-length ISA. Stop translation when the next insn
11262 * will touch a new page. This ensures that prefetch aborts occur at
11263 * the right place.
11264 *
11265 * We want to stop the TB if the next insn starts in a new page,
11266 * or if it spans between this page and the next. This means that
11267 * if we're looking at the last halfword in the page we need to
11268 * see if it's a 16-bit Thumb insn (which will fit in this TB)
11269 * or a 32-bit Thumb insn (which won't).
11270 * This is to avoid generating a silly TB with a single 16-bit insn
11271 * in it at the end of this page (which would execute correctly
11272 * but isn't very efficient).
11273 */
11274 if (dc->base.is_jmp == DISAS_NEXT
11275 && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11276 || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11277 && insn_crosses_page(env, dc)))) {
11278 dc->base.is_jmp = DISAS_TOO_MANY;
11279 }
11280 }
11281
11282 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11283 {
11284 DisasContext *dc = container_of(dcbase, DisasContext, base);
11285
11286 if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11287 /* FIXME: This can theoretically happen with self-modifying code. */
11288 cpu_abort(cpu, "IO on conditional branch instruction");
11289 }
11290
11291 /* At this stage dc->condjmp will only be set when the skipped
11292 instruction was a conditional branch or trap, and the PC has
11293 already been written. */
11294 gen_set_condexec(dc);
11295 if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11296 /* Exception return branches need some special case code at the
11297 * end of the TB, which is complex enough that it has to
11298 * handle the single-step vs not and the condition-failed
11299 * insn codepath itself.
11300 */
11301 gen_bx_excret_final_code(dc);
11302 } else if (unlikely(is_singlestepping(dc))) {
11303 /* Unconditional and "condition passed" instruction codepath. */
11304 switch (dc->base.is_jmp) {
11305 case DISAS_SWI:
11306 gen_ss_advance(dc);
11307 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11308 default_exception_el(dc));
11309 break;
11310 case DISAS_HVC:
11311 gen_ss_advance(dc);
11312 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11313 break;
11314 case DISAS_SMC:
11315 gen_ss_advance(dc);
11316 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11317 break;
11318 case DISAS_NEXT:
11319 case DISAS_TOO_MANY:
11320 case DISAS_UPDATE:
11321 gen_set_pc_im(dc, dc->base.pc_next);
11322 /* fall through */
11323 default:
11324 /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11325 gen_singlestep_exception(dc);
11326 break;
11327 case DISAS_NORETURN:
11328 break;
11329 }
11330 } else {
11331 /* While branches must always occur at the end of an IT block,
11332 there are a few other things that can cause us to terminate
11333 the TB in the middle of an IT block:
11334 - Exception generating instructions (bkpt, swi, undefined).
11335 - Page boundaries.
11336 - Hardware watchpoints.
11337 Hardware breakpoints have already been handled and skip this code.
11338 */
11339 switch(dc->base.is_jmp) {
11340 case DISAS_NEXT:
11341 case DISAS_TOO_MANY:
11342 gen_goto_tb(dc, 1, dc->base.pc_next);
11343 break;
11344 case DISAS_JUMP:
11345 gen_goto_ptr();
11346 break;
11347 case DISAS_UPDATE:
11348 gen_set_pc_im(dc, dc->base.pc_next);
11349 /* fall through */
11350 default:
11351 /* indicate that the hash table must be used to find the next TB */
11352 tcg_gen_exit_tb(NULL, 0);
11353 break;
11354 case DISAS_NORETURN:
11355 /* nothing more to generate */
11356 break;
11357 case DISAS_WFI:
11358 {
11359 TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11360 !(dc->insn & (1U << 31))) ? 2 : 4);
11361
11362 gen_helper_wfi(cpu_env, tmp);
11363 tcg_temp_free_i32(tmp);
11364 /* The helper doesn't necessarily throw an exception, but we
11365 * must go back to the main loop to check for interrupts anyway.
11366 */
11367 tcg_gen_exit_tb(NULL, 0);
11368 break;
11369 }
11370 case DISAS_WFE:
11371 gen_helper_wfe(cpu_env);
11372 break;
11373 case DISAS_YIELD:
11374 gen_helper_yield(cpu_env);
11375 break;
11376 case DISAS_SWI:
11377 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11378 default_exception_el(dc));
11379 break;
11380 case DISAS_HVC:
11381 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11382 break;
11383 case DISAS_SMC:
11384 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11385 break;
11386 }
11387 }
11388
11389 if (dc->condjmp) {
11390 /* "Condition failed" instruction codepath for the branch/trap insn */
11391 gen_set_label(dc->condlabel);
11392 gen_set_condexec(dc);
11393 if (unlikely(is_singlestepping(dc))) {
11394 gen_set_pc_im(dc, dc->base.pc_next);
11395 gen_singlestep_exception(dc);
11396 } else {
11397 gen_goto_tb(dc, 1, dc->base.pc_next);
11398 }
11399 }
11400 }
11401
11402 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11403 {
11404 DisasContext *dc = container_of(dcbase, DisasContext, base);
11405
11406 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11407 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11408 }
11409
11410 static const TranslatorOps arm_translator_ops = {
11411 .init_disas_context = arm_tr_init_disas_context,
11412 .tb_start = arm_tr_tb_start,
11413 .insn_start = arm_tr_insn_start,
11414 .breakpoint_check = arm_tr_breakpoint_check,
11415 .translate_insn = arm_tr_translate_insn,
11416 .tb_stop = arm_tr_tb_stop,
11417 .disas_log = arm_tr_disas_log,
11418 };
11419
11420 static const TranslatorOps thumb_translator_ops = {
11421 .init_disas_context = arm_tr_init_disas_context,
11422 .tb_start = arm_tr_tb_start,
11423 .insn_start = arm_tr_insn_start,
11424 .breakpoint_check = arm_tr_breakpoint_check,
11425 .translate_insn = thumb_tr_translate_insn,
11426 .tb_stop = arm_tr_tb_stop,
11427 .disas_log = arm_tr_disas_log,
11428 };
11429
11430 /* generate intermediate code for basic block 'tb'. */
11431 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11432 {
11433 DisasContext dc = { };
11434 const TranslatorOps *ops = &arm_translator_ops;
11435
11436 if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
11437 ops = &thumb_translator_ops;
11438 }
11439 #ifdef TARGET_AARCH64
11440 if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11441 ops = &aarch64_translator_ops;
11442 }
11443 #endif
11444
11445 translator_loop(ops, &dc.base, cpu, tb, max_insns);
11446 }
11447
11448 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11449 target_ulong *data)
11450 {
11451 if (is_a64(env)) {
11452 env->pc = data[0];
11453 env->condexec_bits = 0;
11454 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11455 } else {
11456 env->regs[15] = data[0];
11457 env->condexec_bits = data[1];
11458 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11459 }
11460 }