]> git.proxmox.com Git - mirror_qemu.git/blame - tcg/aarch64/tcg-target.c.inc
tcg/i386: Support bswap flags
[mirror_qemu.git] / tcg / aarch64 / tcg-target.c.inc
CommitLineData
4a136e0a
CF
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
139c1837 13#include "../tcg-pool.c.inc"
4a136e0a
CF
14#include "qemu/bitops.h"
15
7763ffa0
RH
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
8d8fdbae 21#ifdef CONFIG_DEBUG_TCG
4a136e0a 22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
14e4c1e2
RH
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
4a136e0a 32};
8d8fdbae 33#endif /* CONFIG_DEBUG_TCG */
4a136e0a
CF
34
35static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
b76f21a7 38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
4a136e0a 39
d82b78e4
RH
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
4a136e0a
CF
42 TCG_REG_X16, TCG_REG_X17,
43
4a136e0a
CF
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
d82b78e4
RH
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
14e4c1e2
RH
51
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
4a136e0a
CF
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
67};
68
d82b78e4 69#define TCG_REG_TMP TCG_REG_X30
14e4c1e2 70#define TCG_VEC_TMP TCG_REG_V31
4a136e0a 71
6a91c7c9 72#ifndef CONFIG_SOFTMMU
352bcb0a
RH
73/* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
4cbea598 78#define TCG_REG_GUEST_BASE TCG_REG_X28
6a91c7c9
JK
79#endif
80
ffba3eb3 81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
4a136e0a 82{
ffba3eb3
RH
83 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84 ptrdiff_t offset = target - src_rx;
85
214bfe83
RH
86 if (offset == sextract64(offset, 0, 26)) {
87 /* read instruction, mask away previous PC_REL26 parameter contents,
88 set the proper offset, then write back the instruction. */
ffba3eb3 89 *src_rw = deposit32(*src_rw, 0, 26, offset);
214bfe83
RH
90 return true;
91 }
92 return false;
4a136e0a
CF
93}
94
ffba3eb3 95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
4a136e0a 96{
ffba3eb3
RH
97 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98 ptrdiff_t offset = target - src_rx;
99
214bfe83 100 if (offset == sextract64(offset, 0, 19)) {
ffba3eb3 101 *src_rw = deposit32(*src_rw, 5, 19, offset);
214bfe83
RH
102 return true;
103 }
104 return false;
4a136e0a
CF
105}
106
ffba3eb3
RH
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108 intptr_t value, intptr_t addend)
4a136e0a 109{
eabb7b91 110 tcg_debug_assert(addend == 0);
4a136e0a
CF
111 switch (type) {
112 case R_AARCH64_JUMP26:
113 case R_AARCH64_CALL26:
ffba3eb3 114 return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
4a136e0a 115 case R_AARCH64_CONDBR19:
ffba3eb3 116 return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
4a136e0a 117 default:
214bfe83 118 g_assert_not_reached();
4a136e0a
CF
119 }
120}
121
170bf931
RH
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
9e27f58b
RH
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
90f1cd91 128
abc730e1
RH
129#define ALL_GENERAL_REGS 0xffffffffu
130#define ALL_VECTOR_REGS 0xffffffff00000000ull
131
4a136e0a 132#ifdef CONFIG_SOFTMMU
abc730e1
RH
133#define ALL_QLDST_REGS \
134 (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135 (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
136#else
137#define ALL_QLDST_REGS ALL_GENERAL_REGS
4a136e0a 138#endif
4a136e0a 139
14e4c1e2 140/* Match a constant valid for addition (12-bit, optionally shifted). */
90f1cd91
RH
141static inline bool is_aimm(uint64_t val)
142{
143 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
144}
145
14e4c1e2 146/* Match a constant valid for logical operations. */
e029f293
RH
147static inline bool is_limm(uint64_t val)
148{
149 /* Taking a simplified view of the logical immediates for now, ignoring
150 the replication that can happen across the field. Match bit patterns
151 of the forms
152 0....01....1
153 0..01..10..0
154 and their inverses. */
155
156 /* Make things easier below, by testing the form with msb clear. */
157 if ((int64_t)val < 0) {
158 val = ~val;
159 }
160 if (val == 0) {
161 return false;
162 }
163 val += val & -val;
164 return (val & (val - 1)) == 0;
165}
166
984fdcee
RH
167/* Return true if v16 is a valid 16-bit shifted immediate. */
168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
14e4c1e2 169{
984fdcee
RH
170 if (v16 == (v16 & 0xff)) {
171 *cmode = 0x8;
172 *imm8 = v16 & 0xff;
173 return true;
174 } else if (v16 == (v16 & 0xff00)) {
175 *cmode = 0xa;
176 *imm8 = v16 >> 8;
177 return true;
178 }
179 return false;
180}
14e4c1e2 181
984fdcee
RH
182/* Return true if v32 is a valid 32-bit shifted immediate. */
183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
184{
185 if (v32 == (v32 & 0xff)) {
186 *cmode = 0x0;
187 *imm8 = v32 & 0xff;
188 return true;
189 } else if (v32 == (v32 & 0xff00)) {
190 *cmode = 0x2;
191 *imm8 = (v32 >> 8) & 0xff;
192 return true;
193 } else if (v32 == (v32 & 0xff0000)) {
194 *cmode = 0x4;
195 *imm8 = (v32 >> 16) & 0xff;
196 return true;
197 } else if (v32 == (v32 & 0xff000000)) {
198 *cmode = 0x6;
199 *imm8 = v32 >> 24;
14e4c1e2
RH
200 return true;
201 }
984fdcee
RH
202 return false;
203}
14e4c1e2 204
984fdcee
RH
205/* Return true if v32 is a valid 32-bit shifting ones immediate. */
206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
207{
208 if ((v32 & 0xffff00ff) == 0xff) {
209 *cmode = 0xc;
210 *imm8 = (v32 >> 8) & 0xff;
211 return true;
212 } else if ((v32 & 0xff00ffff) == 0xffff) {
213 *cmode = 0xd;
214 *imm8 = (v32 >> 16) & 0xff;
215 return true;
14e4c1e2 216 }
984fdcee
RH
217 return false;
218}
14e4c1e2 219
984fdcee
RH
220/* Return true if v32 is a valid float32 immediate. */
221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
222{
223 if (extract32(v32, 0, 19) == 0
224 && (extract32(v32, 25, 6) == 0x20
225 || extract32(v32, 25, 6) == 0x1f)) {
226 *cmode = 0xf;
227 *imm8 = (extract32(v32, 31, 1) << 7)
228 | (extract32(v32, 25, 1) << 6)
229 | extract32(v32, 19, 6);
230 return true;
14e4c1e2 231 }
984fdcee
RH
232 return false;
233}
234
235/* Return true if v64 is a valid float64 immediate. */
236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
237{
14e4c1e2
RH
238 if (extract64(v64, 0, 48) == 0
239 && (extract64(v64, 54, 9) == 0x100
240 || extract64(v64, 54, 9) == 0x0ff)) {
241 *cmode = 0xf;
14e4c1e2
RH
242 *imm8 = (extract64(v64, 63, 1) << 7)
243 | (extract64(v64, 54, 1) << 6)
244 | extract64(v64, 48, 6);
245 return true;
246 }
14e4c1e2
RH
247 return false;
248}
249
02f3a5b4
RH
250/*
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
254 */
255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
256{
257 int i;
258
259 for (i = 6; i > 0; i -= 2) {
260 /* Mask out one byte we can add with ORR. */
261 uint32_t tmp = v32 & ~(0xffu << (i * 4));
262 if (is_shimm32(tmp, cmode, imm8) ||
263 is_soimm32(tmp, cmode, imm8)) {
264 break;
265 }
266 }
267 return i;
268}
269
9e27f58b
RH
270/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
272{
273 if (v32 == deposit32(v32, 16, 16, v32)) {
274 return is_shimm16(v32, cmode, imm8);
275 } else {
276 return is_shimm32(v32, cmode, imm8);
277 }
278}
279
a4fbbd77 280static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
4a136e0a 281{
4a136e0a
CF
282 if (ct & TCG_CT_CONST) {
283 return 1;
284 }
170bf931 285 if (type == TCG_TYPE_I32) {
90f1cd91
RH
286 val = (int32_t)val;
287 }
288 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
289 return 1;
290 }
e029f293
RH
291 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
292 return 1;
293 }
04ce397b
RH
294 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
295 return 1;
296 }
c6e929e7
RH
297 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
298 return 1;
299 }
4a136e0a 300
9e27f58b
RH
301 switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
302 case 0:
303 break;
304 case TCG_CT_CONST_ANDI:
305 val = ~val;
306 /* fallthru */
307 case TCG_CT_CONST_ORRI:
308 if (val == deposit64(val, 32, 32, val)) {
309 int cmode, imm8;
310 return is_shimm1632(val, &cmode, &imm8);
311 }
312 break;
313 default:
314 /* Both bits should not be set for the same insn. */
315 g_assert_not_reached();
316 }
317
4a136e0a
CF
318 return 0;
319}
320
321enum aarch64_cond_code {
322 COND_EQ = 0x0,
323 COND_NE = 0x1,
324 COND_CS = 0x2, /* Unsigned greater or equal */
325 COND_HS = COND_CS, /* ALIAS greater or equal */
326 COND_CC = 0x3, /* Unsigned less than */
327 COND_LO = COND_CC, /* ALIAS Lower */
328 COND_MI = 0x4, /* Negative */
329 COND_PL = 0x5, /* Zero or greater */
330 COND_VS = 0x6, /* Overflow */
331 COND_VC = 0x7, /* No overflow */
332 COND_HI = 0x8, /* Unsigned greater than */
333 COND_LS = 0x9, /* Unsigned less or equal */
334 COND_GE = 0xa,
335 COND_LT = 0xb,
336 COND_GT = 0xc,
337 COND_LE = 0xd,
338 COND_AL = 0xe,
339 COND_NV = 0xf, /* behaves like COND_AL here */
340};
341
342static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
343 [TCG_COND_EQ] = COND_EQ,
344 [TCG_COND_NE] = COND_NE,
345 [TCG_COND_LT] = COND_LT,
346 [TCG_COND_GE] = COND_GE,
347 [TCG_COND_LE] = COND_LE,
348 [TCG_COND_GT] = COND_GT,
349 /* unsigned */
350 [TCG_COND_LTU] = COND_LO,
351 [TCG_COND_GTU] = COND_HI,
352 [TCG_COND_GEU] = COND_HS,
353 [TCG_COND_LEU] = COND_LS,
354};
355
3d4299f4
RH
356typedef enum {
357 LDST_ST = 0, /* store */
358 LDST_LD = 1, /* load */
359 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
360 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
361} AArch64LdstType;
4a136e0a 362
50573c66
RH
363/* We encode the format of the insn into the beginning of the name, so that
364 we can have the preprocessor help "typecheck" the insn vs the output
365 function. Arm didn't provide us with nice names for the formats, so we
366 use the section number of the architecture reference manual in which the
367 instruction group is described. */
368typedef enum {
3d9e69a2
RH
369 /* Compare and branch (immediate). */
370 I3201_CBZ = 0x34000000,
371 I3201_CBNZ = 0x35000000,
372
81d8a5ee
RH
373 /* Conditional branch (immediate). */
374 I3202_B_C = 0x54000000,
375
376 /* Unconditional branch (immediate). */
377 I3206_B = 0x14000000,
378 I3206_BL = 0x94000000,
379
380 /* Unconditional branch (register). */
381 I3207_BR = 0xd61f0000,
382 I3207_BLR = 0xd63f0000,
383 I3207_RET = 0xd65f0000,
384
f23e5e15
RH
385 /* AdvSIMD load/store single structure. */
386 I3303_LD1R = 0x0d40c000,
387
2acee8b2
PK
388 /* Load literal for loading the address at pc-relative offset */
389 I3305_LDR = 0x58000000,
14e4c1e2
RH
390 I3305_LDR_v64 = 0x5c000000,
391 I3305_LDR_v128 = 0x9c000000,
392
3d4299f4
RH
393 /* Load/store register. Described here as 3.3.12, but the helper
394 that emits them can transform to 3.3.10 or 3.3.13. */
395 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
396 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
397 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
398 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
399
400 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
401 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
402 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
403 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
404
405 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
406 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
407
408 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
409 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
410 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
411
14e4c1e2
RH
412 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
413 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
414
415 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
416 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
417
418 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
419 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
420
6c0f0c0f 421 I3312_TO_I3310 = 0x00200800,
3d4299f4
RH
422 I3312_TO_I3313 = 0x01000000,
423
95f72aa9
RH
424 /* Load/store register pair instructions. */
425 I3314_LDP = 0x28400000,
426 I3314_STP = 0x28000000,
427
096c46c0
RH
428 /* Add/subtract immediate instructions. */
429 I3401_ADDI = 0x11000000,
430 I3401_ADDSI = 0x31000000,
431 I3401_SUBI = 0x51000000,
432 I3401_SUBSI = 0x71000000,
433
b3c56df7
RH
434 /* Bitfield instructions. */
435 I3402_BFM = 0x33000000,
436 I3402_SBFM = 0x13000000,
437 I3402_UBFM = 0x53000000,
438
439 /* Extract instruction. */
440 I3403_EXTR = 0x13800000,
441
e029f293
RH
442 /* Logical immediate instructions. */
443 I3404_ANDI = 0x12000000,
444 I3404_ORRI = 0x32000000,
445 I3404_EORI = 0x52000000,
446
582ab779
RH
447 /* Move wide immediate instructions. */
448 I3405_MOVN = 0x12800000,
449 I3405_MOVZ = 0x52800000,
450 I3405_MOVK = 0x72800000,
451
c6e310d9
RH
452 /* PC relative addressing instructions. */
453 I3406_ADR = 0x10000000,
454 I3406_ADRP = 0x90000000,
455
50573c66
RH
456 /* Add/subtract shifted register instructions (without a shift). */
457 I3502_ADD = 0x0b000000,
458 I3502_ADDS = 0x2b000000,
459 I3502_SUB = 0x4b000000,
460 I3502_SUBS = 0x6b000000,
461
462 /* Add/subtract shifted register instructions (with a shift). */
463 I3502S_ADD_LSL = I3502_ADD,
464
c6e929e7
RH
465 /* Add/subtract with carry instructions. */
466 I3503_ADC = 0x1a000000,
467 I3503_SBC = 0x5a000000,
468
04ce397b
RH
469 /* Conditional select instructions. */
470 I3506_CSEL = 0x1a800000,
471 I3506_CSINC = 0x1a800400,
53c76c19
RH
472 I3506_CSINV = 0x5a800000,
473 I3506_CSNEG = 0x5a800400,
04ce397b 474
edd8824c 475 /* Data-processing (1 source) instructions. */
53c76c19
RH
476 I3507_CLZ = 0x5ac01000,
477 I3507_RBIT = 0x5ac00000,
edd8824c
RH
478 I3507_REV16 = 0x5ac00400,
479 I3507_REV32 = 0x5ac00800,
480 I3507_REV64 = 0x5ac00c00,
481
df9351e3
RH
482 /* Data-processing (2 source) instructions. */
483 I3508_LSLV = 0x1ac02000,
484 I3508_LSRV = 0x1ac02400,
485 I3508_ASRV = 0x1ac02800,
486 I3508_RORV = 0x1ac02c00,
1fcc9ddf
RH
487 I3508_SMULH = 0x9b407c00,
488 I3508_UMULH = 0x9bc07c00,
8678b71c
RH
489 I3508_UDIV = 0x1ac00800,
490 I3508_SDIV = 0x1ac00c00,
491
492 /* Data-processing (3 source) instructions. */
493 I3509_MADD = 0x1b000000,
494 I3509_MSUB = 0x1b008000,
df9351e3 495
50573c66
RH
496 /* Logical shifted register instructions (without a shift). */
497 I3510_AND = 0x0a000000,
14b155dd 498 I3510_BIC = 0x0a200000,
50573c66 499 I3510_ORR = 0x2a000000,
14b155dd 500 I3510_ORN = 0x2a200000,
50573c66 501 I3510_EOR = 0x4a000000,
14b155dd 502 I3510_EON = 0x4a200000,
50573c66 503 I3510_ANDS = 0x6a000000,
c7a59c2a 504
f7bcd966
RH
505 /* Logical shifted register instructions (with a shift). */
506 I3502S_AND_LSR = I3510_AND | (1 << 22),
507
14e4c1e2
RH
508 /* AdvSIMD copy */
509 I3605_DUP = 0x0e000400,
510 I3605_INS = 0x4e001c00,
511 I3605_UMOV = 0x0e003c00,
512
513 /* AdvSIMD modified immediate */
514 I3606_MOVI = 0x0f000400,
7e308e00 515 I3606_MVNI = 0x2f000400,
02f3a5b4
RH
516 I3606_BIC = 0x2f001400,
517 I3606_ORR = 0x0f001400,
14e4c1e2 518
d81bad24
RH
519 /* AdvSIMD scalar shift by immediate */
520 I3609_SSHR = 0x5f000400,
521 I3609_SSRA = 0x5f001400,
522 I3609_SHL = 0x5f005400,
523 I3609_USHR = 0x7f000400,
524 I3609_USRA = 0x7f001400,
525 I3609_SLI = 0x7f005400,
526
527 /* AdvSIMD scalar three same */
528 I3611_SQADD = 0x5e200c00,
529 I3611_SQSUB = 0x5e202c00,
530 I3611_CMGT = 0x5e203400,
531 I3611_CMGE = 0x5e203c00,
532 I3611_SSHL = 0x5e204400,
533 I3611_ADD = 0x5e208400,
534 I3611_CMTST = 0x5e208c00,
535 I3611_UQADD = 0x7e200c00,
536 I3611_UQSUB = 0x7e202c00,
537 I3611_CMHI = 0x7e203400,
538 I3611_CMHS = 0x7e203c00,
539 I3611_USHL = 0x7e204400,
540 I3611_SUB = 0x7e208400,
541 I3611_CMEQ = 0x7e208c00,
542
543 /* AdvSIMD scalar two-reg misc */
544 I3612_CMGT0 = 0x5e208800,
545 I3612_CMEQ0 = 0x5e209800,
546 I3612_CMLT0 = 0x5e20a800,
547 I3612_ABS = 0x5e20b800,
548 I3612_CMGE0 = 0x7e208800,
549 I3612_CMLE0 = 0x7e209800,
550 I3612_NEG = 0x7e20b800,
551
14e4c1e2
RH
552 /* AdvSIMD shift by immediate */
553 I3614_SSHR = 0x0f000400,
554 I3614_SSRA = 0x0f001400,
555 I3614_SHL = 0x0f005400,
7cff8988 556 I3614_SLI = 0x2f005400,
14e4c1e2
RH
557 I3614_USHR = 0x2f000400,
558 I3614_USRA = 0x2f001400,
559
560 /* AdvSIMD three same. */
561 I3616_ADD = 0x0e208400,
562 I3616_AND = 0x0e201c00,
563 I3616_BIC = 0x0e601c00,
a9e434a5
RH
564 I3616_BIF = 0x2ee01c00,
565 I3616_BIT = 0x2ea01c00,
566 I3616_BSL = 0x2e601c00,
14e4c1e2
RH
567 I3616_EOR = 0x2e201c00,
568 I3616_MUL = 0x0e209c00,
569 I3616_ORR = 0x0ea01c00,
570 I3616_ORN = 0x0ee01c00,
571 I3616_SUB = 0x2e208400,
572 I3616_CMGT = 0x0e203400,
573 I3616_CMGE = 0x0e203c00,
574 I3616_CMTST = 0x0e208c00,
575 I3616_CMHI = 0x2e203400,
576 I3616_CMHS = 0x2e203c00,
577 I3616_CMEQ = 0x2e208c00,
93f332a5
RH
578 I3616_SMAX = 0x0e206400,
579 I3616_SMIN = 0x0e206c00,
79525dfd 580 I3616_SSHL = 0x0e204400,
d32648d4
RH
581 I3616_SQADD = 0x0e200c00,
582 I3616_SQSUB = 0x0e202c00,
93f332a5
RH
583 I3616_UMAX = 0x2e206400,
584 I3616_UMIN = 0x2e206c00,
d32648d4
RH
585 I3616_UQADD = 0x2e200c00,
586 I3616_UQSUB = 0x2e202c00,
79525dfd 587 I3616_USHL = 0x2e204400,
14e4c1e2
RH
588
589 /* AdvSIMD two-reg misc. */
590 I3617_CMGT0 = 0x0e208800,
591 I3617_CMEQ0 = 0x0e209800,
592 I3617_CMLT0 = 0x0e20a800,
593 I3617_CMGE0 = 0x2e208800,
6c2c7772 594 I3617_CMLE0 = 0x2e209800,
14e4c1e2 595 I3617_NOT = 0x2e205800,
a456394a 596 I3617_ABS = 0x0e20b800,
14e4c1e2
RH
597 I3617_NEG = 0x2e20b800,
598
c7a59c2a 599 /* System instructions. */
14e4c1e2 600 NOP = 0xd503201f,
c7a59c2a
PK
601 DMB_ISH = 0xd50338bf,
602 DMB_LD = 0x00000100,
603 DMB_ST = 0x00000200,
50573c66 604} AArch64Insn;
4a136e0a 605
4a136e0a
CF
606static inline uint32_t tcg_in32(TCGContext *s)
607{
608 uint32_t v = *(uint32_t *)s->code_ptr;
609 return v;
610}
611
50573c66
RH
612/* Emit an opcode with "type-checking" of the format. */
613#define tcg_out_insn(S, FMT, OP, ...) \
614 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
615
f23e5e15
RH
616static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
617 TCGReg rt, TCGReg rn, unsigned size)
618{
619 tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
620}
621
622static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
623 int imm19, TCGReg rt)
2acee8b2
PK
624{
625 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
626}
627
3d9e69a2
RH
628static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
629 TCGReg rt, int imm19)
630{
631 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
632}
633
81d8a5ee
RH
634static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
635 TCGCond c, int imm19)
636{
637 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
638}
639
640static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
641{
642 tcg_out32(s, insn | (imm26 & 0x03ffffff));
643}
644
645static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
646{
647 tcg_out32(s, insn | rn << 5);
648}
649
95f72aa9
RH
650static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
651 TCGReg r1, TCGReg r2, TCGReg rn,
652 tcg_target_long ofs, bool pre, bool w)
653{
654 insn |= 1u << 31; /* ext */
655 insn |= pre << 24;
656 insn |= w << 23;
657
eabb7b91 658 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
95f72aa9
RH
659 insn |= (ofs & (0x7f << 3)) << (15 - 3);
660
661 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
662}
663
096c46c0
RH
664static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
665 TCGReg rd, TCGReg rn, uint64_t aimm)
666{
667 if (aimm > 0xfff) {
eabb7b91 668 tcg_debug_assert((aimm & 0xfff) == 0);
096c46c0 669 aimm >>= 12;
eabb7b91 670 tcg_debug_assert(aimm <= 0xfff);
096c46c0
RH
671 aimm |= 1 << 12; /* apply LSL 12 */
672 }
673 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
674}
675
e029f293
RH
676/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
677 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
678 that feed the DecodeBitMasks pseudo function. */
679static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
680 TCGReg rd, TCGReg rn, int n, int immr, int imms)
681{
682 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
683 | rn << 5 | rd);
684}
685
686#define tcg_out_insn_3404 tcg_out_insn_3402
687
b3c56df7
RH
688static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
689 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
690{
691 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
692 | rn << 5 | rd);
693}
694
582ab779
RH
695/* This function is used for the Move (wide immediate) instruction group.
696 Note that SHIFT is a full shift count, not the 2 bit HW field. */
697static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
698 TCGReg rd, uint16_t half, unsigned shift)
699{
eabb7b91 700 tcg_debug_assert((shift & ~0x30) == 0);
582ab779
RH
701 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
702}
703
c6e310d9
RH
704static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
705 TCGReg rd, int64_t disp)
706{
707 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
708}
709
50573c66
RH
710/* This function is for both 3.5.2 (Add/Subtract shifted register), for
711 the rare occasion when we actually want to supply a shift amount. */
712static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
713 TCGType ext, TCGReg rd, TCGReg rn,
714 TCGReg rm, int imm6)
715{
716 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
717}
718
719/* This function is for 3.5.2 (Add/subtract shifted register),
720 and 3.5.10 (Logical shifted register), for the vast majorty of cases
721 when we don't want to apply a shift. Thus it can also be used for
722 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
723static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
724 TCGReg rd, TCGReg rn, TCGReg rm)
725{
726 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
727}
728
729#define tcg_out_insn_3503 tcg_out_insn_3502
730#define tcg_out_insn_3508 tcg_out_insn_3502
731#define tcg_out_insn_3510 tcg_out_insn_3502
732
04ce397b
RH
733static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
734 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
735{
736 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
737 | tcg_cond_to_aarch64[c] << 12);
738}
739
edd8824c
RH
740static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
741 TCGReg rd, TCGReg rn)
742{
743 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
744}
745
8678b71c
RH
746static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
747 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
748{
749 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
750}
751
14e4c1e2
RH
752static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
753 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
754{
755 /* Note that bit 11 set means general register input. Therefore
756 we can handle both register sets with one function. */
757 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
758 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
759}
760
761static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
762 TCGReg rd, bool op, int cmode, uint8_t imm8)
763{
764 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
765 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
766}
767
d81bad24
RH
768static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
769 TCGReg rd, TCGReg rn, unsigned immhb)
770{
771 tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
772}
773
774static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
775 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
776{
777 tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
778 | (rn & 0x1f) << 5 | (rd & 0x1f));
779}
780
781static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
782 unsigned size, TCGReg rd, TCGReg rn)
783{
784 tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
785}
786
14e4c1e2
RH
787static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
788 TCGReg rd, TCGReg rn, unsigned immhb)
789{
790 tcg_out32(s, insn | q << 30 | immhb << 16
791 | (rn & 0x1f) << 5 | (rd & 0x1f));
792}
793
794static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
795 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
796{
797 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
798 | (rn & 0x1f) << 5 | (rd & 0x1f));
799}
800
801static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
802 unsigned size, TCGReg rd, TCGReg rn)
803{
804 tcg_out32(s, insn | q << 30 | (size << 22)
805 | (rn & 0x1f) << 5 | (rd & 0x1f));
806}
807
3d4299f4 808static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
6c0f0c0f
PB
809 TCGReg rd, TCGReg base, TCGType ext,
810 TCGReg regoff)
3d4299f4
RH
811{
812 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
6c0f0c0f 813 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
14e4c1e2 814 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
3d4299f4 815}
50573c66 816
3d4299f4
RH
817static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
818 TCGReg rd, TCGReg rn, intptr_t offset)
4a136e0a 819{
14e4c1e2 820 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
4a136e0a
CF
821}
822
3d4299f4
RH
823static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
824 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
b1f6dc0d 825{
3d4299f4 826 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
14e4c1e2
RH
827 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
828 | rn << 5 | (rd & 0x1f));
b1f6dc0d
CF
829}
830
7d11fc7c
RH
831/* Register to register move using ORR (shifted register with no shift). */
832static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
4a136e0a 833{
7d11fc7c
RH
834 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
835}
836
837/* Register to register move using ADDI (move to/from SP). */
838static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
839{
840 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
4a136e0a
CF
841}
842
4ec4f0bd
RH
843/* This function is used for the Logical (immediate) instruction group.
844 The value of LIMM must satisfy IS_LIMM. See the comment above about
845 only supporting simplified logical immediates. */
846static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
847 TCGReg rd, TCGReg rn, uint64_t limm)
848{
849 unsigned h, l, r, c;
850
eabb7b91 851 tcg_debug_assert(is_limm(limm));
4ec4f0bd
RH
852
853 h = clz64(limm);
854 l = ctz64(limm);
855 if (l == 0) {
856 r = 0; /* form 0....01....1 */
857 c = ctz64(~limm) - 1;
858 if (h == 0) {
859 r = clz64(~limm); /* form 1..10..01..1 */
860 c += r;
861 }
862 } else {
863 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
864 c = r - h - 1;
865 }
866 if (ext == TCG_TYPE_I32) {
867 r &= 31;
868 c &= 31;
869 }
870
871 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
872}
873
4e186175
RH
874static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
875 TCGReg rd, int64_t v64)
14e4c1e2 876{
984fdcee
RH
877 bool q = type == TCG_TYPE_V128;
878 int cmode, imm8, i;
879
880 /* Test all bytes equal first. */
4e186175 881 if (vece == MO_8) {
984fdcee
RH
882 imm8 = (uint8_t)v64;
883 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
884 return;
885 }
886
887 /*
888 * Test all bytes 0x00 or 0xff second. This can match cases that
889 * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
890 */
891 for (i = imm8 = 0; i < 8; i++) {
892 uint8_t byte = v64 >> (i * 8);
893 if (byte == 0xff) {
894 imm8 |= 1 << i;
895 } else if (byte != 0) {
896 goto fail_bytes;
897 }
898 }
899 tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
900 return;
901 fail_bytes:
902
903 /*
904 * Tests for various replications. For each element width, if we
905 * cannot find an expansion there's no point checking a larger
906 * width because we already know by replication it cannot match.
907 */
4e186175 908 if (vece == MO_16) {
984fdcee
RH
909 uint16_t v16 = v64;
910
911 if (is_shimm16(v16, &cmode, &imm8)) {
912 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
913 return;
914 }
7e308e00
RH
915 if (is_shimm16(~v16, &cmode, &imm8)) {
916 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
917 return;
918 }
02f3a5b4
RH
919
920 /*
921 * Otherwise, all remaining constants can be loaded in two insns:
922 * rd = v16 & 0xff, rd |= v16 & 0xff00.
923 */
924 tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
925 tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
926 return;
4e186175 927 } else if (vece == MO_32) {
984fdcee 928 uint32_t v32 = v64;
7e308e00 929 uint32_t n32 = ~v32;
984fdcee
RH
930
931 if (is_shimm32(v32, &cmode, &imm8) ||
932 is_soimm32(v32, &cmode, &imm8) ||
933 is_fimm32(v32, &cmode, &imm8)) {
934 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
935 return;
936 }
7e308e00
RH
937 if (is_shimm32(n32, &cmode, &imm8) ||
938 is_soimm32(n32, &cmode, &imm8)) {
939 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
940 return;
941 }
02f3a5b4
RH
942
943 /*
944 * Restrict the set of constants to those we can load with
945 * two instructions. Others we load from the pool.
946 */
947 i = is_shimm32_pair(v32, &cmode, &imm8);
948 if (i) {
949 tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950 tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
951 return;
952 }
953 i = is_shimm32_pair(n32, &cmode, &imm8);
954 if (i) {
955 tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
956 tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
957 return;
958 }
984fdcee
RH
959 } else if (is_fimm64(v64, &cmode, &imm8)) {
960 tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
961 return;
962 }
14e4c1e2 963
984fdcee
RH
964 /*
965 * As a last resort, load from the constant pool. Sadly there
966 * is no LD1R (literal), so store the full 16-byte vector.
967 */
968 if (type == TCG_TYPE_V128) {
14e4c1e2
RH
969 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
970 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
971 } else {
972 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
973 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
974 }
975}
976
e7632cfa
RH
977static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
978 TCGReg rd, TCGReg rs)
979{
980 int is_q = type - TCG_TYPE_V64;
981 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
982 return true;
983}
984
d6ecb4a9
RH
985static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
986 TCGReg r, TCGReg base, intptr_t offset)
987{
f23e5e15
RH
988 TCGReg temp = TCG_REG_TMP;
989
990 if (offset < -0xffffff || offset > 0xffffff) {
991 tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
992 tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
993 base = temp;
994 } else {
995 AArch64Insn add_insn = I3401_ADDI;
996
997 if (offset < 0) {
998 add_insn = I3401_SUBI;
999 offset = -offset;
1000 }
1001 if (offset & 0xfff000) {
1002 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1003 base = temp;
1004 }
1005 if (offset & 0xfff) {
1006 tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1007 base = temp;
1008 }
1009 }
1010 tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1011 return true;
d6ecb4a9
RH
1012}
1013
582ab779
RH
1014static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1015 tcg_target_long value)
4a136e0a 1016{
dfeb5fe7
RH
1017 tcg_target_long svalue = value;
1018 tcg_target_long ivalue = ~value;
55129955
RH
1019 tcg_target_long t0, t1, t2;
1020 int s0, s1;
1021 AArch64Insn opc;
dfeb5fe7 1022
14e4c1e2
RH
1023 switch (type) {
1024 case TCG_TYPE_I32:
1025 case TCG_TYPE_I64:
1026 tcg_debug_assert(rd < 32);
1027 break;
14e4c1e2
RH
1028 default:
1029 g_assert_not_reached();
1030 }
1031
dfeb5fe7
RH
1032 /* For 32-bit values, discard potential garbage in value. For 64-bit
1033 values within [2**31, 2**32-1], we can create smaller sequences by
1034 interpreting this as a negative 32-bit number, while ensuring that
1035 the high 32 bits are cleared by setting SF=0. */
1036 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1037 svalue = (int32_t)value;
582ab779 1038 value = (uint32_t)value;
dfeb5fe7
RH
1039 ivalue = (uint32_t)ivalue;
1040 type = TCG_TYPE_I32;
1041 }
1042
d8918df5
RH
1043 /* Speed things up by handling the common case of small positive
1044 and negative values specially. */
1045 if ((value & ~0xffffull) == 0) {
1046 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1047 return;
1048 } else if ((ivalue & ~0xffffull) == 0) {
1049 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1050 return;
1051 }
1052
4ec4f0bd
RH
1053 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
1054 use the sign-extended value. That lets us match rotated values such
1055 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1056 if (is_limm(svalue)) {
1057 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1058 return;
1059 }
1060
c6e310d9
RH
1061 /* Look for host pointer values within 4G of the PC. This happens
1062 often when loading pointers to QEMU's own data structures. */
1063 if (type == TCG_TYPE_I64) {
ffba3eb3
RH
1064 intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1065 tcg_target_long disp = value - src_rx;
cc74d332
RH
1066 if (disp == sextract64(disp, 0, 21)) {
1067 tcg_out_insn(s, 3406, ADR, rd, disp);
1068 return;
1069 }
ffba3eb3 1070 disp = (value >> 12) - (src_rx >> 12);
c6e310d9
RH
1071 if (disp == sextract64(disp, 0, 21)) {
1072 tcg_out_insn(s, 3406, ADRP, rd, disp);
1073 if (value & 0xfff) {
1074 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1075 }
1076 return;
1077 }
1078 }
1079
55129955
RH
1080 /* Would it take fewer insns to begin with MOVN? */
1081 if (ctpop64(value) >= 32) {
1082 t0 = ivalue;
1083 opc = I3405_MOVN;
8cf9a3d3 1084 } else {
55129955
RH
1085 t0 = value;
1086 opc = I3405_MOVZ;
1087 }
1088 s0 = ctz64(t0) & (63 & -16);
1089 t1 = t0 & ~(0xffffUL << s0);
1090 s1 = ctz64(t1) & (63 & -16);
1091 t2 = t1 & ~(0xffffUL << s1);
1092 if (t2 == 0) {
1093 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1094 if (t1 != 0) {
1095 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
8cf9a3d3 1096 }
55129955 1097 return;
dfeb5fe7 1098 }
55129955
RH
1099
1100 /* For more than 2 insns, dump it into the constant pool. */
1101 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1102 tcg_out_insn(s, 3305, LDR, 0, rd);
4a136e0a
CF
1103}
1104
3d4299f4
RH
1105/* Define something more legible for general use. */
1106#define tcg_out_ldst_r tcg_out_insn_3310
4a136e0a 1107
14e4c1e2
RH
1108static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1109 TCGReg rn, intptr_t offset, int lgsize)
4a136e0a 1110{
3d4299f4
RH
1111 /* If the offset is naturally aligned and in range, then we can
1112 use the scaled uimm12 encoding */
14e4c1e2
RH
1113 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1114 uintptr_t scaled_uimm = offset >> lgsize;
3d4299f4
RH
1115 if (scaled_uimm <= 0xfff) {
1116 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1117 return;
b1f6dc0d
CF
1118 }
1119 }
1120
a056c9fa
RH
1121 /* Small signed offsets can use the unscaled encoding. */
1122 if (offset >= -256 && offset < 256) {
1123 tcg_out_insn_3312(s, insn, rd, rn, offset);
1124 return;
1125 }
1126
3d4299f4 1127 /* Worst-case scenario, move offset to temp register, use reg offset. */
b1f6dc0d 1128 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
6c0f0c0f 1129 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
4a136e0a
CF
1130}
1131
78113e83 1132static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
4a136e0a 1133{
14e4c1e2 1134 if (ret == arg) {
78113e83 1135 return true;
14e4c1e2
RH
1136 }
1137 switch (type) {
1138 case TCG_TYPE_I32:
1139 case TCG_TYPE_I64:
1140 if (ret < 32 && arg < 32) {
1141 tcg_out_movr(s, type, ret, arg);
1142 break;
1143 } else if (ret < 32) {
1144 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1145 break;
1146 } else if (arg < 32) {
1147 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1148 break;
1149 }
1150 /* FALLTHRU */
1151
1152 case TCG_TYPE_V64:
1153 tcg_debug_assert(ret >= 32 && arg >= 32);
1154 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1155 break;
1156 case TCG_TYPE_V128:
1157 tcg_debug_assert(ret >= 32 && arg >= 32);
1158 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1159 break;
1160
1161 default:
1162 g_assert_not_reached();
4a136e0a 1163 }
78113e83 1164 return true;
4a136e0a
CF
1165}
1166
14e4c1e2
RH
1167static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1168 TCGReg base, intptr_t ofs)
4a136e0a 1169{
14e4c1e2
RH
1170 AArch64Insn insn;
1171 int lgsz;
1172
1173 switch (type) {
1174 case TCG_TYPE_I32:
1175 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1176 lgsz = 2;
1177 break;
1178 case TCG_TYPE_I64:
1179 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1180 lgsz = 3;
1181 break;
1182 case TCG_TYPE_V64:
1183 insn = I3312_LDRVD;
1184 lgsz = 3;
1185 break;
1186 case TCG_TYPE_V128:
1187 insn = I3312_LDRVQ;
1188 lgsz = 4;
1189 break;
1190 default:
1191 g_assert_not_reached();
1192 }
1193 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
4a136e0a
CF
1194}
1195
14e4c1e2
RH
1196static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1197 TCGReg base, intptr_t ofs)
4a136e0a 1198{
14e4c1e2
RH
1199 AArch64Insn insn;
1200 int lgsz;
1201
1202 switch (type) {
1203 case TCG_TYPE_I32:
1204 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1205 lgsz = 2;
1206 break;
1207 case TCG_TYPE_I64:
1208 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1209 lgsz = 3;
1210 break;
1211 case TCG_TYPE_V64:
1212 insn = I3312_STRVD;
1213 lgsz = 3;
1214 break;
1215 case TCG_TYPE_V128:
1216 insn = I3312_STRVQ;
1217 lgsz = 4;
1218 break;
1219 default:
1220 g_assert_not_reached();
1221 }
1222 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
4a136e0a
CF
1223}
1224
59d7c14e
RH
1225static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1226 TCGReg base, intptr_t ofs)
1227{
14e4c1e2 1228 if (type <= TCG_TYPE_I64 && val == 0) {
59d7c14e
RH
1229 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1230 return true;
1231 }
1232 return false;
1233}
1234
b3c56df7
RH
1235static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1236 TCGReg rn, unsigned int a, unsigned int b)
1237{
1238 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1239}
1240
7763ffa0
RH
1241static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1242 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1243{
b3c56df7 1244 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1245}
1246
7763ffa0
RH
1247static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1248 TCGReg rn, unsigned int a, unsigned int b)
4a136e0a 1249{
b3c56df7 1250 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
4a136e0a
CF
1251}
1252
7763ffa0 1253static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
4a136e0a
CF
1254 TCGReg rn, TCGReg rm, unsigned int a)
1255{
b3c56df7 1256 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
4a136e0a
CF
1257}
1258
7763ffa0 1259static inline void tcg_out_shl(TCGContext *s, TCGType ext,
4a136e0a
CF
1260 TCGReg rd, TCGReg rn, unsigned int m)
1261{
b3c56df7
RH
1262 int bits = ext ? 64 : 32;
1263 int max = bits - 1;
4a136e0a
CF
1264 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1265}
1266
7763ffa0 1267static inline void tcg_out_shr(TCGContext *s, TCGType ext,
4a136e0a
CF
1268 TCGReg rd, TCGReg rn, unsigned int m)
1269{
1270 int max = ext ? 63 : 31;
1271 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1272}
1273
7763ffa0 1274static inline void tcg_out_sar(TCGContext *s, TCGType ext,
4a136e0a
CF
1275 TCGReg rd, TCGReg rn, unsigned int m)
1276{
1277 int max = ext ? 63 : 31;
1278 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1279}
1280
7763ffa0 1281static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
4a136e0a
CF
1282 TCGReg rd, TCGReg rn, unsigned int m)
1283{
1284 int max = ext ? 63 : 31;
1285 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1286}
1287
7763ffa0 1288static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
4a136e0a
CF
1289 TCGReg rd, TCGReg rn, unsigned int m)
1290{
26b1248f
YK
1291 int max = ext ? 63 : 31;
1292 tcg_out_extr(s, ext, rd, rn, rn, -m & max);
4a136e0a
CF
1293}
1294
b3c56df7
RH
1295static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1296 TCGReg rn, unsigned lsb, unsigned width)
1297{
1298 unsigned size = ext ? 64 : 32;
1299 unsigned a = (size - lsb) & (size - 1);
1300 unsigned b = width - 1;
1301 tcg_out_bfm(s, ext, rd, rn, a, b);
1302}
1303
90f1cd91
RH
1304static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1305 tcg_target_long b, bool const_b)
4a136e0a 1306{
90f1cd91
RH
1307 if (const_b) {
1308 /* Using CMP or CMN aliases. */
1309 if (b >= 0) {
1310 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1311 } else {
1312 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1313 }
1314 } else {
1315 /* Using CMP alias SUBS wzr, Wn, Wm */
1316 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1317 }
4a136e0a
CF
1318}
1319
ffd0e507 1320static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
4a136e0a 1321{
ffba3eb3 1322 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
eabb7b91 1323 tcg_debug_assert(offset == sextract64(offset, 0, 26));
81d8a5ee 1324 tcg_out_insn(s, 3206, B, offset);
4a136e0a
CF
1325}
1326
ffba3eb3 1327static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
23b7aa1d 1328{
ffba3eb3 1329 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
23b7aa1d 1330 if (offset == sextract64(offset, 0, 26)) {
f716bab3 1331 tcg_out_insn(s, 3206, B, offset);
23b7aa1d
PK
1332 } else {
1333 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1334 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1335 }
1336}
1337
4a136e0a
CF
1338static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1339{
81d8a5ee 1340 tcg_out_insn(s, 3207, BLR, reg);
4a136e0a
CF
1341}
1342
ffba3eb3 1343static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
4a136e0a 1344{
ffba3eb3 1345 ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
8587c30c 1346 if (offset == sextract64(offset, 0, 26)) {
81d8a5ee 1347 tcg_out_insn(s, 3206, BL, offset);
8587c30c
RH
1348 } else {
1349 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1350 tcg_out_callr(s, TCG_REG_TMP);
4a136e0a
CF
1351 }
1352}
1353
1acbad0f
RH
1354void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1355 uintptr_t jmp_rw, uintptr_t addr)
4a136e0a 1356{
b68686bd
PK
1357 tcg_insn_unit i1, i2;
1358 TCGType rt = TCG_TYPE_I64;
1359 TCGReg rd = TCG_REG_TMP;
1360 uint64_t pair;
4a136e0a 1361
1acbad0f 1362 ptrdiff_t offset = addr - jmp_rx;
b68686bd
PK
1363
1364 if (offset == sextract64(offset, 0, 26)) {
1365 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1366 i2 = NOP;
1367 } else {
1acbad0f 1368 offset = (addr >> 12) - (jmp_rx >> 12);
b68686bd
PK
1369
1370 /* patch ADRP */
1371 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1372 /* patch ADDI */
1373 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1374 }
1375 pair = (uint64_t)i2 << 32 | i1;
1acbad0f
RH
1376 qatomic_set((uint64_t *)jmp_rw, pair);
1377 flush_idcache_range(jmp_rx, jmp_rw, 8);
4a136e0a
CF
1378}
1379
bec16311 1380static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
4a136e0a 1381{
4a136e0a 1382 if (!l->has_value) {
bec16311 1383 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
733589b3 1384 tcg_out_insn(s, 3206, B, 0);
4a136e0a 1385 } else {
8587c30c 1386 tcg_out_goto(s, l->u.value_ptr);
4a136e0a
CF
1387 }
1388}
1389
dc1eccd6 1390static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
bec16311 1391 TCGArg b, bool b_const, TCGLabel *l)
4a136e0a 1392{
cae1f6f3 1393 intptr_t offset;
3d9e69a2 1394 bool need_cmp;
cae1f6f3 1395
3d9e69a2
RH
1396 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1397 need_cmp = false;
1398 } else {
1399 need_cmp = true;
1400 tcg_out_cmp(s, ext, a, b, b_const);
1401 }
4a136e0a
CF
1402
1403 if (!l->has_value) {
bec16311 1404 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
cae1f6f3 1405 offset = tcg_in32(s) >> 5;
4a136e0a 1406 } else {
ffba3eb3 1407 offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
eabb7b91 1408 tcg_debug_assert(offset == sextract64(offset, 0, 19));
4a136e0a 1409 }
cae1f6f3 1410
3d9e69a2
RH
1411 if (need_cmp) {
1412 tcg_out_insn(s, 3202, B_C, c, offset);
1413 } else if (c == TCG_COND_EQ) {
1414 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1415 } else {
1416 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1417 }
4a136e0a
CF
1418}
1419
edd8824c 1420static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
9c4a059d 1421{
edd8824c 1422 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
9c4a059d
CF
1423}
1424
edd8824c 1425static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
9c4a059d 1426{
edd8824c
RH
1427 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1428}
1429
1430static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1431{
1432 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
9c4a059d
CF
1433}
1434
14776ab5 1435static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
31f1275b
CF
1436 TCGReg rd, TCGReg rn)
1437{
b3c56df7 1438 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
929f8b55 1439 int bits = (8 << s_bits) - 1;
31f1275b
CF
1440 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1441}
1442
14776ab5 1443static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
31f1275b
CF
1444 TCGReg rd, TCGReg rn)
1445{
b3c56df7 1446 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
929f8b55 1447 int bits = (8 << s_bits) - 1;
31f1275b
CF
1448 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1449}
1450
90f1cd91
RH
1451static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1452 TCGReg rn, int64_t aimm)
1453{
1454 if (aimm >= 0) {
1455 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1456 } else {
1457 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1458 }
1459}
1460
707b45a2
RH
1461static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1462 TCGReg rh, TCGReg al, TCGReg ah,
1463 tcg_target_long bl, tcg_target_long bh,
1464 bool const_bl, bool const_bh, bool sub)
c6e929e7
RH
1465{
1466 TCGReg orig_rl = rl;
1467 AArch64Insn insn;
1468
1469 if (rl == ah || (!const_bh && rl == bh)) {
1470 rl = TCG_REG_TMP;
1471 }
1472
1473 if (const_bl) {
707b45a2 1474 if (bl < 0) {
c6e929e7 1475 bl = -bl;
707b45a2
RH
1476 insn = sub ? I3401_ADDSI : I3401_SUBSI;
1477 } else {
1478 insn = sub ? I3401_SUBSI : I3401_ADDSI;
c6e929e7 1479 }
707b45a2 1480
b1eb20da
RH
1481 if (unlikely(al == TCG_REG_XZR)) {
1482 /* ??? We want to allow al to be zero for the benefit of
1483 negation via subtraction. However, that leaves open the
1484 possibility of adding 0+const in the low part, and the
1485 immediate add instructions encode XSP not XZR. Don't try
1486 anything more elaborate here than loading another zero. */
1487 al = TCG_REG_TMP;
1488 tcg_out_movi(s, ext, al, 0);
1489 }
c6e929e7
RH
1490 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1491 } else {
1492 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1493 }
1494
1495 insn = I3503_ADC;
1496 if (const_bh) {
1497 /* Note that the only two constants we support are 0 and -1, and
1498 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1499 if ((bh != 0) ^ sub) {
1500 insn = I3503_SBC;
1501 }
1502 bh = TCG_REG_XZR;
1503 } else if (sub) {
1504 insn = I3503_SBC;
1505 }
1506 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1507
b825025f 1508 tcg_out_mov(s, ext, orig_rl, rl);
c6e929e7
RH
1509}
1510
c7a59c2a
PK
1511static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1512{
1513 static const uint32_t sync[] = {
1514 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1515 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1516 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1517 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1518 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1519 };
1520 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1521}
1522
53c76c19
RH
1523static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1524 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1525{
1526 TCGReg a1 = a0;
1527 if (is_ctz) {
1528 a1 = TCG_REG_TMP;
1529 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1530 }
1531 if (const_b && b == (ext ? 64 : 32)) {
1532 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1533 } else {
1534 AArch64Insn sel = I3506_CSEL;
1535
1536 tcg_out_cmp(s, ext, a0, 0, 1);
1537 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1538
1539 if (const_b) {
1540 if (b == -1) {
1541 b = TCG_REG_XZR;
1542 sel = I3506_CSINV;
1543 } else if (b == 0) {
1544 b = TCG_REG_XZR;
1545 } else {
1546 tcg_out_movi(s, ext, d, b);
1547 b = d;
1548 }
1549 }
1550 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1551 }
1552}
1553
4a136e0a 1554#ifdef CONFIG_SOFTMMU
139c1837 1555#include "../tcg-ldst.c.inc"
659ef5cb 1556
023261ef 1557/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
3972ef6f 1558 * TCGMemOpIdx oi, uintptr_t ra)
023261ef 1559 */
8587c30c 1560static void * const qemu_ld_helpers[16] = {
de61d14f
RH
1561 [MO_UB] = helper_ret_ldub_mmu,
1562 [MO_LEUW] = helper_le_lduw_mmu,
1563 [MO_LEUL] = helper_le_ldul_mmu,
1564 [MO_LEQ] = helper_le_ldq_mmu,
1565 [MO_BEUW] = helper_be_lduw_mmu,
1566 [MO_BEUL] = helper_be_ldul_mmu,
1567 [MO_BEQ] = helper_be_ldq_mmu,
4a136e0a
CF
1568};
1569
023261ef 1570/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
3972ef6f
RH
1571 * uintxx_t val, TCGMemOpIdx oi,
1572 * uintptr_t ra)
023261ef 1573 */
8587c30c 1574static void * const qemu_st_helpers[16] = {
de61d14f
RH
1575 [MO_UB] = helper_ret_stb_mmu,
1576 [MO_LEUW] = helper_le_stw_mmu,
1577 [MO_LEUL] = helper_le_stl_mmu,
1578 [MO_LEQ] = helper_le_stq_mmu,
1579 [MO_BEUW] = helper_be_stw_mmu,
1580 [MO_BEUL] = helper_be_stl_mmu,
1581 [MO_BEQ] = helper_be_stq_mmu,
4a136e0a
CF
1582};
1583
ffba3eb3 1584static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
dc0c8aaf 1585{
8587c30c 1586 ptrdiff_t offset = tcg_pcrel_diff(s, target);
eabb7b91 1587 tcg_debug_assert(offset == sextract64(offset, 0, 21));
8587c30c 1588 tcg_out_insn(s, 3406, ADR, rd, offset);
dc0c8aaf
RH
1589}
1590
aeee05f5 1591static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1592{
3972ef6f 1593 TCGMemOpIdx oi = lb->oi;
14776ab5
TN
1594 MemOp opc = get_memop(oi);
1595 MemOp size = opc & MO_SIZE;
929f8b55 1596
ffba3eb3 1597 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
aeee05f5
RH
1598 return false;
1599 }
017a86f7 1600
3972ef6f 1601 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
b825025f 1602 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
3972ef6f 1603 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
8587c30c 1604 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
2b7ec66f 1605 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
929f8b55 1606 if (opc & MO_SIGN) {
9c53889b 1607 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
c6d8ed24 1608 } else {
b825025f 1609 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
c6d8ed24
JK
1610 }
1611
8587c30c 1612 tcg_out_goto(s, lb->raddr);
aeee05f5 1613 return true;
c6d8ed24
JK
1614}
1615
aeee05f5 1616static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
c6d8ed24 1617{
3972ef6f 1618 TCGMemOpIdx oi = lb->oi;
14776ab5
TN
1619 MemOp opc = get_memop(oi);
1620 MemOp size = opc & MO_SIZE;
929f8b55 1621
ffba3eb3 1622 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
aeee05f5
RH
1623 return false;
1624 }
c6d8ed24 1625
3972ef6f 1626 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
b825025f
RH
1627 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1628 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
3972ef6f 1629 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
8587c30c 1630 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
2b7ec66f 1631 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
8587c30c 1632 tcg_out_goto(s, lb->raddr);
aeee05f5 1633 return true;
c6d8ed24
JK
1634}
1635
3972ef6f 1636static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
9c53889b 1637 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
3972ef6f 1638 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
c6d8ed24 1639{
9ecefc84 1640 TCGLabelQemuLdst *label = new_ldst_label(s);
c6d8ed24 1641
c6d8ed24 1642 label->is_ld = is_ld;
3972ef6f 1643 label->oi = oi;
9c53889b 1644 label->type = ext;
c6d8ed24
JK
1645 label->datalo_reg = data_reg;
1646 label->addrlo_reg = addr_reg;
e5e2e4c7 1647 label->raddr = tcg_splitwx_to_rx(raddr);
c6d8ed24
JK
1648 label->label_ptr[0] = label_ptr;
1649}
1650
269bd5d8
RH
1651/* We expect to use a 7-bit scaled negative offset from ENV. */
1652QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1653QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
f7bcd966 1654
65b23204
RH
1655/* These offsets are built into the LDP below. */
1656QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1657QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1658
c6d8ed24
JK
1659/* Load and compare a TLB entry, emitting the conditional jump to the
1660 slow path for the failure case, which will be patched later when finalizing
1661 the slow path. Generated code returns the host addend in X1,
1662 clobbers X0,X2,X3,TMP. */
14776ab5 1663static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
8587c30c
RH
1664 tcg_insn_unit **label_ptr, int mem_index,
1665 bool is_read)
c6d8ed24 1666{
85aa8081
RH
1667 unsigned a_bits = get_alignment_bits(opc);
1668 unsigned s_bits = opc & MO_SIZE;
1669 unsigned a_mask = (1u << a_bits) - 1;
1670 unsigned s_mask = (1u << s_bits) - 1;
65b23204 1671 TCGReg x3;
f7bcd966
RH
1672 TCGType mask_type;
1673 uint64_t compare_mask;
1674
f7bcd966
RH
1675 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1676 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1677
65b23204
RH
1678 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
1679 tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1680 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
f7bcd966
RH
1681
1682 /* Extract the TLB index from the address into X0. */
1683 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1684 TCG_REG_X0, TCG_REG_X0, addr_reg,
1685 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1686
1687 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1688 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1689
1690 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1691 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1692 ? offsetof(CPUTLBEntry, addr_read)
1693 : offsetof(CPUTLBEntry, addr_write));
1694 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1695 offsetof(CPUTLBEntry, addend));
9ee14902
RH
1696
1697 /* For aligned accesses, we check the first byte and include the alignment
1698 bits within the address. For unaligned access, we check that we don't
1699 cross pages using the address of the last byte of the access. */
85aa8081 1700 if (a_bits >= s_bits) {
9ee14902
RH
1701 x3 = addr_reg;
1702 } else {
1703 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
85aa8081 1704 TCG_REG_X3, addr_reg, s_mask - a_mask);
9ee14902
RH
1705 x3 = TCG_REG_X3;
1706 }
f7bcd966 1707 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
6f472467 1708
9ee14902
RH
1709 /* Store the page mask part of the address into X3. */
1710 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
f7bcd966 1711 TCG_REG_X3, x3, compare_mask);
6f472467 1712
c6d8ed24 1713 /* Perform the address comparison. */
f7bcd966 1714 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
6f472467 1715
c6d8ed24 1716 /* If not equal, we jump to the slow path. */
6f472467 1717 *label_ptr = s->code_ptr;
733589b3 1718 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
c6d8ed24
JK
1719}
1720
1721#endif /* CONFIG_SOFTMMU */
6a91c7c9 1722
14776ab5 1723static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
ffc63728
PB
1724 TCGReg data_r, TCGReg addr_r,
1725 TCGType otype, TCGReg off_r)
6a91c7c9 1726{
14776ab5 1727 const MemOp bswap = memop & MO_BSWAP;
9e4177ad
RH
1728
1729 switch (memop & MO_SSIZE) {
1730 case MO_UB:
6c0f0c0f 1731 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
6a91c7c9 1732 break;
9e4177ad 1733 case MO_SB:
9c53889b 1734 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
6c0f0c0f 1735 data_r, addr_r, otype, off_r);
6a91c7c9 1736 break;
9e4177ad 1737 case MO_UW:
6c0f0c0f 1738 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
9e4177ad 1739 if (bswap) {
edd8824c 1740 tcg_out_rev16(s, data_r, data_r);
6a91c7c9
JK
1741 }
1742 break;
9e4177ad
RH
1743 case MO_SW:
1744 if (bswap) {
6c0f0c0f 1745 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
edd8824c 1746 tcg_out_rev16(s, data_r, data_r);
9c53889b 1747 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
6a91c7c9 1748 } else {
6c0f0c0f
PB
1749 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1750 data_r, addr_r, otype, off_r);
6a91c7c9
JK
1751 }
1752 break;
9e4177ad 1753 case MO_UL:
6c0f0c0f 1754 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
9e4177ad 1755 if (bswap) {
edd8824c 1756 tcg_out_rev32(s, data_r, data_r);
6a91c7c9
JK
1757 }
1758 break;
9e4177ad
RH
1759 case MO_SL:
1760 if (bswap) {
6c0f0c0f 1761 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
edd8824c 1762 tcg_out_rev32(s, data_r, data_r);
929f8b55 1763 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
6a91c7c9 1764 } else {
6c0f0c0f 1765 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
6a91c7c9
JK
1766 }
1767 break;
9e4177ad 1768 case MO_Q:
6c0f0c0f 1769 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
9e4177ad 1770 if (bswap) {
edd8824c 1771 tcg_out_rev64(s, data_r, data_r);
6a91c7c9
JK
1772 }
1773 break;
1774 default:
1775 tcg_abort();
1776 }
1777}
1778
14776ab5 1779static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
ffc63728
PB
1780 TCGReg data_r, TCGReg addr_r,
1781 TCGType otype, TCGReg off_r)
6a91c7c9 1782{
14776ab5 1783 const MemOp bswap = memop & MO_BSWAP;
9e4177ad
RH
1784
1785 switch (memop & MO_SIZE) {
1786 case MO_8:
6c0f0c0f 1787 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
6a91c7c9 1788 break;
9e4177ad 1789 case MO_16:
e81864a1 1790 if (bswap && data_r != TCG_REG_XZR) {
edd8824c 1791 tcg_out_rev16(s, TCG_REG_TMP, data_r);
9e4177ad 1792 data_r = TCG_REG_TMP;
6a91c7c9 1793 }
6c0f0c0f 1794 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
6a91c7c9 1795 break;
9e4177ad 1796 case MO_32:
e81864a1 1797 if (bswap && data_r != TCG_REG_XZR) {
edd8824c 1798 tcg_out_rev32(s, TCG_REG_TMP, data_r);
9e4177ad 1799 data_r = TCG_REG_TMP;
6a91c7c9 1800 }
6c0f0c0f 1801 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
6a91c7c9 1802 break;
9e4177ad 1803 case MO_64:
e81864a1 1804 if (bswap && data_r != TCG_REG_XZR) {
edd8824c 1805 tcg_out_rev64(s, TCG_REG_TMP, data_r);
9e4177ad 1806 data_r = TCG_REG_TMP;
6a91c7c9 1807 }
6c0f0c0f 1808 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
6a91c7c9
JK
1809 break;
1810 default:
1811 tcg_abort();
1812 }
1813}
4a136e0a 1814
667b1cdd 1815static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
59227d5d 1816 TCGMemOpIdx oi, TCGType ext)
4a136e0a 1817{
14776ab5 1818 MemOp memop = get_memop(oi);
80adb8fc 1819 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
4a136e0a 1820#ifdef CONFIG_SOFTMMU
59227d5d 1821 unsigned mem_index = get_mmuidx(oi);
8587c30c 1822 tcg_insn_unit *label_ptr;
4a136e0a 1823
9ee14902 1824 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
80adb8fc
RH
1825 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1826 TCG_REG_X1, otype, addr_reg);
3972ef6f
RH
1827 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1828 s->code_ptr, label_ptr);
4a136e0a 1829#else /* !CONFIG_SOFTMMU */
352bcb0a
RH
1830 if (USE_GUEST_BASE) {
1831 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1832 TCG_REG_GUEST_BASE, otype, addr_reg);
1833 } else {
1834 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1835 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1836 }
6a91c7c9 1837#endif /* CONFIG_SOFTMMU */
4a136e0a
CF
1838}
1839
667b1cdd 1840static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
59227d5d 1841 TCGMemOpIdx oi)
4a136e0a 1842{
14776ab5 1843 MemOp memop = get_memop(oi);
80adb8fc 1844 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
4a136e0a 1845#ifdef CONFIG_SOFTMMU
59227d5d 1846 unsigned mem_index = get_mmuidx(oi);
8587c30c 1847 tcg_insn_unit *label_ptr;
4a136e0a 1848
9ee14902 1849 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
80adb8fc
RH
1850 tcg_out_qemu_st_direct(s, memop, data_reg,
1851 TCG_REG_X1, otype, addr_reg);
9ee14902
RH
1852 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1853 data_reg, addr_reg, s->code_ptr, label_ptr);
4a136e0a 1854#else /* !CONFIG_SOFTMMU */
352bcb0a
RH
1855 if (USE_GUEST_BASE) {
1856 tcg_out_qemu_st_direct(s, memop, data_reg,
1857 TCG_REG_GUEST_BASE, otype, addr_reg);
1858 } else {
1859 tcg_out_qemu_st_direct(s, memop, data_reg,
1860 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1861 }
6a91c7c9 1862#endif /* CONFIG_SOFTMMU */
4a136e0a
CF
1863}
1864
ffba3eb3 1865static const tcg_insn_unit *tb_ret_addr;
4a136e0a 1866
4a136e0a 1867static void tcg_out_op(TCGContext *s, TCGOpcode opc,
8d8db193
RH
1868 const TCGArg args[TCG_MAX_OP_ARGS],
1869 const int const_args[TCG_MAX_OP_ARGS])
4a136e0a 1870{
f0293414
RH
1871 /* 99% of the time, we can signal the use of extension registers
1872 by looking to see if the opcode handles 64-bit data. */
1873 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
4a136e0a 1874
8d8db193
RH
1875 /* Hoist the loads of the most common arguments. */
1876 TCGArg a0 = args[0];
1877 TCGArg a1 = args[1];
1878 TCGArg a2 = args[2];
1879 int c2 = const_args[2];
1880
04ce397b
RH
1881 /* Some operands are defined with "rZ" constraint, a register or
1882 the zero register. These need not actually test args[I] == 0. */
1883#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1884
4a136e0a
CF
1885 switch (opc) {
1886 case INDEX_op_exit_tb:
b19f0c2e
RH
1887 /* Reuse the zeroing that exists for goto_ptr. */
1888 if (a0 == 0) {
8b5c2b62 1889 tcg_out_goto_long(s, tcg_code_gen_epilogue);
b19f0c2e
RH
1890 } else {
1891 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
23b7aa1d 1892 tcg_out_goto_long(s, tb_ret_addr);
b19f0c2e 1893 }
4a136e0a
CF
1894 break;
1895
1896 case INDEX_op_goto_tb:
2acee8b2 1897 if (s->tb_jmp_insn_offset != NULL) {
a8583393 1898 /* TCG_TARGET_HAS_direct_jump */
2acee8b2
PK
1899 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1900 write can be used to patch the target address. */
1901 if ((uintptr_t)s->code_ptr & 7) {
1902 tcg_out32(s, NOP);
1903 }
1904 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1905 /* actual branch destination will be patched by
a8583393 1906 tb_target_set_jmp_target later. */
2acee8b2
PK
1907 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1908 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1909 } else {
a8583393 1910 /* !TCG_TARGET_HAS_direct_jump */
2acee8b2
PK
1911 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1912 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1913 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
b68686bd 1914 }
b68686bd 1915 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
9f754620 1916 set_jmp_reset_offset(s, a0);
4a136e0a
CF
1917 break;
1918
b19f0c2e
RH
1919 case INDEX_op_goto_ptr:
1920 tcg_out_insn(s, 3207, BR, a0);
1921 break;
1922
4a136e0a 1923 case INDEX_op_br:
bec16311 1924 tcg_out_goto_label(s, arg_label(a0));
4a136e0a
CF
1925 break;
1926
4a136e0a 1927 case INDEX_op_ld8u_i32:
4a136e0a 1928 case INDEX_op_ld8u_i64:
14e4c1e2 1929 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
dc73dfd4
RH
1930 break;
1931 case INDEX_op_ld8s_i32:
14e4c1e2 1932 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
dc73dfd4 1933 break;
4a136e0a 1934 case INDEX_op_ld8s_i64:
14e4c1e2 1935 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
dc73dfd4
RH
1936 break;
1937 case INDEX_op_ld16u_i32:
4a136e0a 1938 case INDEX_op_ld16u_i64:
14e4c1e2 1939 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
dc73dfd4
RH
1940 break;
1941 case INDEX_op_ld16s_i32:
14e4c1e2 1942 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
dc73dfd4 1943 break;
4a136e0a 1944 case INDEX_op_ld16s_i64:
14e4c1e2 1945 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
dc73dfd4
RH
1946 break;
1947 case INDEX_op_ld_i32:
4a136e0a 1948 case INDEX_op_ld32u_i64:
14e4c1e2 1949 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
dc73dfd4 1950 break;
4a136e0a 1951 case INDEX_op_ld32s_i64:
14e4c1e2 1952 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
e81864a1 1953 break;
dc73dfd4 1954 case INDEX_op_ld_i64:
14e4c1e2 1955 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
dc73dfd4
RH
1956 break;
1957
4a136e0a
CF
1958 case INDEX_op_st8_i32:
1959 case INDEX_op_st8_i64:
14e4c1e2 1960 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
dc73dfd4 1961 break;
4a136e0a
CF
1962 case INDEX_op_st16_i32:
1963 case INDEX_op_st16_i64:
14e4c1e2 1964 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
dc73dfd4
RH
1965 break;
1966 case INDEX_op_st_i32:
4a136e0a 1967 case INDEX_op_st32_i64:
14e4c1e2 1968 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
dc73dfd4
RH
1969 break;
1970 case INDEX_op_st_i64:
14e4c1e2 1971 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
4a136e0a
CF
1972 break;
1973
4a136e0a 1974 case INDEX_op_add_i32:
90f1cd91
RH
1975 a2 = (int32_t)a2;
1976 /* FALLTHRU */
1977 case INDEX_op_add_i64:
1978 if (c2) {
1979 tcg_out_addsubi(s, ext, a0, a1, a2);
1980 } else {
1981 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1982 }
4a136e0a
CF
1983 break;
1984
4a136e0a 1985 case INDEX_op_sub_i32:
90f1cd91
RH
1986 a2 = (int32_t)a2;
1987 /* FALLTHRU */
1988 case INDEX_op_sub_i64:
1989 if (c2) {
1990 tcg_out_addsubi(s, ext, a0, a1, -a2);
1991 } else {
1992 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1993 }
4a136e0a
CF
1994 break;
1995
14b155dd
RH
1996 case INDEX_op_neg_i64:
1997 case INDEX_op_neg_i32:
1998 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1999 break;
2000
4a136e0a 2001 case INDEX_op_and_i32:
e029f293
RH
2002 a2 = (int32_t)a2;
2003 /* FALLTHRU */
2004 case INDEX_op_and_i64:
2005 if (c2) {
2006 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2007 } else {
2008 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2009 }
4a136e0a
CF
2010 break;
2011
14b155dd
RH
2012 case INDEX_op_andc_i32:
2013 a2 = (int32_t)a2;
2014 /* FALLTHRU */
2015 case INDEX_op_andc_i64:
2016 if (c2) {
2017 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2018 } else {
2019 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2020 }
2021 break;
2022
4a136e0a 2023 case INDEX_op_or_i32:
e029f293
RH
2024 a2 = (int32_t)a2;
2025 /* FALLTHRU */
2026 case INDEX_op_or_i64:
2027 if (c2) {
2028 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2029 } else {
2030 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2031 }
4a136e0a
CF
2032 break;
2033
14b155dd
RH
2034 case INDEX_op_orc_i32:
2035 a2 = (int32_t)a2;
2036 /* FALLTHRU */
2037 case INDEX_op_orc_i64:
2038 if (c2) {
2039 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2040 } else {
2041 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2042 }
2043 break;
2044
4a136e0a 2045 case INDEX_op_xor_i32:
e029f293
RH
2046 a2 = (int32_t)a2;
2047 /* FALLTHRU */
2048 case INDEX_op_xor_i64:
2049 if (c2) {
2050 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2051 } else {
2052 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2053 }
4a136e0a
CF
2054 break;
2055
14b155dd
RH
2056 case INDEX_op_eqv_i32:
2057 a2 = (int32_t)a2;
2058 /* FALLTHRU */
2059 case INDEX_op_eqv_i64:
2060 if (c2) {
2061 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2062 } else {
2063 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2064 }
2065 break;
2066
2067 case INDEX_op_not_i64:
2068 case INDEX_op_not_i32:
2069 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2070 break;
2071
4a136e0a 2072 case INDEX_op_mul_i64:
4a136e0a 2073 case INDEX_op_mul_i32:
8678b71c
RH
2074 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2075 break;
2076
2077 case INDEX_op_div_i64:
2078 case INDEX_op_div_i32:
2079 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2080 break;
2081 case INDEX_op_divu_i64:
2082 case INDEX_op_divu_i32:
2083 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2084 break;
2085
2086 case INDEX_op_rem_i64:
2087 case INDEX_op_rem_i32:
2088 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2089 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2090 break;
2091 case INDEX_op_remu_i64:
2092 case INDEX_op_remu_i32:
2093 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2094 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
4a136e0a
CF
2095 break;
2096
2097 case INDEX_op_shl_i64:
4a136e0a 2098 case INDEX_op_shl_i32:
df9351e3 2099 if (c2) {
8d8db193 2100 tcg_out_shl(s, ext, a0, a1, a2);
df9351e3
RH
2101 } else {
2102 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
4a136e0a
CF
2103 }
2104 break;
2105
2106 case INDEX_op_shr_i64:
4a136e0a 2107 case INDEX_op_shr_i32:
df9351e3 2108 if (c2) {
8d8db193 2109 tcg_out_shr(s, ext, a0, a1, a2);
df9351e3
RH
2110 } else {
2111 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
4a136e0a
CF
2112 }
2113 break;
2114
2115 case INDEX_op_sar_i64:
4a136e0a 2116 case INDEX_op_sar_i32:
df9351e3 2117 if (c2) {
8d8db193 2118 tcg_out_sar(s, ext, a0, a1, a2);
df9351e3
RH
2119 } else {
2120 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
4a136e0a
CF
2121 }
2122 break;
2123
2124 case INDEX_op_rotr_i64:
4a136e0a 2125 case INDEX_op_rotr_i32:
df9351e3 2126 if (c2) {
8d8db193 2127 tcg_out_rotr(s, ext, a0, a1, a2);
df9351e3
RH
2128 } else {
2129 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
4a136e0a
CF
2130 }
2131 break;
2132
2133 case INDEX_op_rotl_i64:
df9351e3
RH
2134 case INDEX_op_rotl_i32:
2135 if (c2) {
8d8db193 2136 tcg_out_rotl(s, ext, a0, a1, a2);
4a136e0a 2137 } else {
50573c66 2138 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
df9351e3 2139 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
4a136e0a
CF
2140 }
2141 break;
2142
53c76c19
RH
2143 case INDEX_op_clz_i64:
2144 case INDEX_op_clz_i32:
2145 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2146 break;
2147 case INDEX_op_ctz_i64:
2148 case INDEX_op_ctz_i32:
2149 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2150 break;
2151
8d8db193 2152 case INDEX_op_brcond_i32:
90f1cd91
RH
2153 a1 = (int32_t)a1;
2154 /* FALLTHRU */
2155 case INDEX_op_brcond_i64:
bec16311 2156 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
4a136e0a
CF
2157 break;
2158
4a136e0a 2159 case INDEX_op_setcond_i32:
90f1cd91
RH
2160 a2 = (int32_t)a2;
2161 /* FALLTHRU */
2162 case INDEX_op_setcond_i64:
2163 tcg_out_cmp(s, ext, a1, a2, c2);
ed7a0aa8
RH
2164 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2165 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2166 TCG_REG_XZR, tcg_invert_cond(args[3]));
4a136e0a
CF
2167 break;
2168
04ce397b
RH
2169 case INDEX_op_movcond_i32:
2170 a2 = (int32_t)a2;
2171 /* FALLTHRU */
2172 case INDEX_op_movcond_i64:
2173 tcg_out_cmp(s, ext, a1, a2, c2);
2174 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2175 break;
2176
de61d14f
RH
2177 case INDEX_op_qemu_ld_i32:
2178 case INDEX_op_qemu_ld_i64:
59227d5d 2179 tcg_out_qemu_ld(s, a0, a1, a2, ext);
4a136e0a 2180 break;
de61d14f
RH
2181 case INDEX_op_qemu_st_i32:
2182 case INDEX_op_qemu_st_i64:
59227d5d 2183 tcg_out_qemu_st(s, REG0(0), a1, a2);
4a136e0a
CF
2184 break;
2185
f0293414 2186 case INDEX_op_bswap64_i64:
edd8824c
RH
2187 tcg_out_rev64(s, a0, a1);
2188 break;
2189 case INDEX_op_bswap32_i64:
9c4a059d 2190 case INDEX_op_bswap32_i32:
edd8824c 2191 tcg_out_rev32(s, a0, a1);
9c4a059d
CF
2192 break;
2193 case INDEX_op_bswap16_i64:
2194 case INDEX_op_bswap16_i32:
edd8824c 2195 tcg_out_rev16(s, a0, a1);
9c4a059d
CF
2196 break;
2197
31f1275b 2198 case INDEX_op_ext8s_i64:
31f1275b 2199 case INDEX_op_ext8s_i32:
929f8b55 2200 tcg_out_sxt(s, ext, MO_8, a0, a1);
31f1275b
CF
2201 break;
2202 case INDEX_op_ext16s_i64:
31f1275b 2203 case INDEX_op_ext16s_i32:
929f8b55 2204 tcg_out_sxt(s, ext, MO_16, a0, a1);
31f1275b 2205 break;
4f2331e5 2206 case INDEX_op_ext_i32_i64:
31f1275b 2207 case INDEX_op_ext32s_i64:
929f8b55 2208 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
31f1275b
CF
2209 break;
2210 case INDEX_op_ext8u_i64:
2211 case INDEX_op_ext8u_i32:
929f8b55 2212 tcg_out_uxt(s, MO_8, a0, a1);
31f1275b
CF
2213 break;
2214 case INDEX_op_ext16u_i64:
2215 case INDEX_op_ext16u_i32:
929f8b55 2216 tcg_out_uxt(s, MO_16, a0, a1);
31f1275b 2217 break;
4f2331e5 2218 case INDEX_op_extu_i32_i64:
31f1275b 2219 case INDEX_op_ext32u_i64:
929f8b55 2220 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
31f1275b
CF
2221 break;
2222
b3c56df7
RH
2223 case INDEX_op_deposit_i64:
2224 case INDEX_op_deposit_i32:
2225 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2226 break;
2227
e2179f94
RH
2228 case INDEX_op_extract_i64:
2229 case INDEX_op_extract_i32:
2230 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2231 break;
2232
2233 case INDEX_op_sextract_i64:
2234 case INDEX_op_sextract_i32:
2235 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2236 break;
2237
464c2969
RH
2238 case INDEX_op_extract2_i64:
2239 case INDEX_op_extract2_i32:
1789d427 2240 tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
464c2969
RH
2241 break;
2242
c6e929e7
RH
2243 case INDEX_op_add2_i32:
2244 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2245 (int32_t)args[4], args[5], const_args[4],
2246 const_args[5], false);
2247 break;
2248 case INDEX_op_add2_i64:
2249 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2250 args[5], const_args[4], const_args[5], false);
2251 break;
2252 case INDEX_op_sub2_i32:
2253 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2254 (int32_t)args[4], args[5], const_args[4],
2255 const_args[5], true);
2256 break;
2257 case INDEX_op_sub2_i64:
2258 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2259 args[5], const_args[4], const_args[5], true);
2260 break;
2261
1fcc9ddf
RH
2262 case INDEX_op_muluh_i64:
2263 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2264 break;
2265 case INDEX_op_mulsh_i64:
2266 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2267 break;
2268
c7a59c2a
PK
2269 case INDEX_op_mb:
2270 tcg_out_mb(s, a0);
2271 break;
2272
96d0ee7f 2273 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
a51a6b6a 2274 case INDEX_op_mov_i64:
96d0ee7f 2275 case INDEX_op_call: /* Always emitted via tcg_out_call. */
4a136e0a 2276 default:
14e4c1e2 2277 g_assert_not_reached();
4a136e0a 2278 }
04ce397b
RH
2279
2280#undef REG0
4a136e0a
CF
2281}
2282
14e4c1e2
RH
2283static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2284 unsigned vecl, unsigned vece,
5e8892db
MR
2285 const TCGArg args[TCG_MAX_OP_ARGS],
2286 const int const_args[TCG_MAX_OP_ARGS])
14e4c1e2 2287{
d81bad24 2288 static const AArch64Insn cmp_vec_insn[16] = {
14e4c1e2
RH
2289 [TCG_COND_EQ] = I3616_CMEQ,
2290 [TCG_COND_GT] = I3616_CMGT,
2291 [TCG_COND_GE] = I3616_CMGE,
2292 [TCG_COND_GTU] = I3616_CMHI,
2293 [TCG_COND_GEU] = I3616_CMHS,
2294 };
d81bad24
RH
2295 static const AArch64Insn cmp_scalar_insn[16] = {
2296 [TCG_COND_EQ] = I3611_CMEQ,
2297 [TCG_COND_GT] = I3611_CMGT,
2298 [TCG_COND_GE] = I3611_CMGE,
2299 [TCG_COND_GTU] = I3611_CMHI,
2300 [TCG_COND_GEU] = I3611_CMHS,
2301 };
2302 static const AArch64Insn cmp0_vec_insn[16] = {
14e4c1e2
RH
2303 [TCG_COND_EQ] = I3617_CMEQ0,
2304 [TCG_COND_GT] = I3617_CMGT0,
2305 [TCG_COND_GE] = I3617_CMGE0,
2306 [TCG_COND_LT] = I3617_CMLT0,
2307 [TCG_COND_LE] = I3617_CMLE0,
2308 };
d81bad24
RH
2309 static const AArch64Insn cmp0_scalar_insn[16] = {
2310 [TCG_COND_EQ] = I3612_CMEQ0,
2311 [TCG_COND_GT] = I3612_CMGT0,
2312 [TCG_COND_GE] = I3612_CMGE0,
2313 [TCG_COND_LT] = I3612_CMLT0,
2314 [TCG_COND_LE] = I3612_CMLE0,
2315 };
14e4c1e2
RH
2316
2317 TCGType type = vecl + TCG_TYPE_V64;
2318 unsigned is_q = vecl;
d81bad24 2319 bool is_scalar = !is_q && vece == MO_64;
a9e434a5 2320 TCGArg a0, a1, a2, a3;
9e27f58b 2321 int cmode, imm8;
14e4c1e2
RH
2322
2323 a0 = args[0];
2324 a1 = args[1];
2325 a2 = args[2];
2326
2327 switch (opc) {
2328 case INDEX_op_ld_vec:
2329 tcg_out_ld(s, type, a0, a1, a2);
2330 break;
2331 case INDEX_op_st_vec:
2332 tcg_out_st(s, type, a0, a1, a2);
2333 break;
37ee55a0
RH
2334 case INDEX_op_dupm_vec:
2335 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2336 break;
14e4c1e2 2337 case INDEX_op_add_vec:
d81bad24
RH
2338 if (is_scalar) {
2339 tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2340 } else {
2341 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2342 }
14e4c1e2
RH
2343 break;
2344 case INDEX_op_sub_vec:
d81bad24
RH
2345 if (is_scalar) {
2346 tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2347 } else {
2348 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2349 }
14e4c1e2
RH
2350 break;
2351 case INDEX_op_mul_vec:
2352 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2353 break;
2354 case INDEX_op_neg_vec:
d81bad24
RH
2355 if (is_scalar) {
2356 tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2357 } else {
2358 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2359 }
14e4c1e2 2360 break;
a456394a 2361 case INDEX_op_abs_vec:
d81bad24
RH
2362 if (is_scalar) {
2363 tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2364 } else {
2365 tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2366 }
a456394a 2367 break;
14e4c1e2 2368 case INDEX_op_and_vec:
9e27f58b
RH
2369 if (const_args[2]) {
2370 is_shimm1632(~a2, &cmode, &imm8);
2371 if (a0 == a1) {
2372 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2373 return;
2374 }
2375 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2376 a2 = a0;
2377 }
14e4c1e2
RH
2378 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2379 break;
2380 case INDEX_op_or_vec:
9e27f58b
RH
2381 if (const_args[2]) {
2382 is_shimm1632(a2, &cmode, &imm8);
2383 if (a0 == a1) {
2384 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2385 return;
2386 }
2387 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2388 a2 = a0;
2389 }
14e4c1e2
RH
2390 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2391 break;
14e4c1e2 2392 case INDEX_op_andc_vec:
9e27f58b
RH
2393 if (const_args[2]) {
2394 is_shimm1632(a2, &cmode, &imm8);
2395 if (a0 == a1) {
2396 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2397 return;
2398 }
2399 tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2400 a2 = a0;
2401 }
14e4c1e2
RH
2402 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2403 break;
2404 case INDEX_op_orc_vec:
9e27f58b
RH
2405 if (const_args[2]) {
2406 is_shimm1632(~a2, &cmode, &imm8);
2407 if (a0 == a1) {
2408 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2409 return;
2410 }
2411 tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2412 a2 = a0;
2413 }
14e4c1e2
RH
2414 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2415 break;
9e27f58b
RH
2416 case INDEX_op_xor_vec:
2417 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2418 break;
d32648d4 2419 case INDEX_op_ssadd_vec:
d81bad24
RH
2420 if (is_scalar) {
2421 tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2422 } else {
2423 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2424 }
d32648d4
RH
2425 break;
2426 case INDEX_op_sssub_vec:
d81bad24
RH
2427 if (is_scalar) {
2428 tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2429 } else {
2430 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2431 }
d32648d4
RH
2432 break;
2433 case INDEX_op_usadd_vec:
d81bad24
RH
2434 if (is_scalar) {
2435 tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2436 } else {
2437 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2438 }
d32648d4
RH
2439 break;
2440 case INDEX_op_ussub_vec:
d81bad24
RH
2441 if (is_scalar) {
2442 tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2443 } else {
2444 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2445 }
d32648d4 2446 break;
93f332a5
RH
2447 case INDEX_op_smax_vec:
2448 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2449 break;
2450 case INDEX_op_smin_vec:
2451 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2452 break;
2453 case INDEX_op_umax_vec:
2454 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2455 break;
2456 case INDEX_op_umin_vec:
2457 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2458 break;
14e4c1e2
RH
2459 case INDEX_op_not_vec:
2460 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2461 break;
14e4c1e2 2462 case INDEX_op_shli_vec:
d81bad24
RH
2463 if (is_scalar) {
2464 tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2465 } else {
2466 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2467 }
14e4c1e2
RH
2468 break;
2469 case INDEX_op_shri_vec:
d81bad24
RH
2470 if (is_scalar) {
2471 tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2472 } else {
2473 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2474 }
14e4c1e2
RH
2475 break;
2476 case INDEX_op_sari_vec:
d81bad24
RH
2477 if (is_scalar) {
2478 tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2479 } else {
2480 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2481 }
14e4c1e2 2482 break;
7cff8988 2483 case INDEX_op_aa64_sli_vec:
d81bad24
RH
2484 if (is_scalar) {
2485 tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2486 } else {
2487 tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2488 }
7cff8988 2489 break;
79525dfd 2490 case INDEX_op_shlv_vec:
d81bad24
RH
2491 if (is_scalar) {
2492 tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2493 } else {
2494 tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2495 }
79525dfd
RH
2496 break;
2497 case INDEX_op_aa64_sshl_vec:
d81bad24
RH
2498 if (is_scalar) {
2499 tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2500 } else {
2501 tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2502 }
79525dfd 2503 break;
14e4c1e2
RH
2504 case INDEX_op_cmp_vec:
2505 {
2506 TCGCond cond = args[3];
2507 AArch64Insn insn;
2508
2509 if (cond == TCG_COND_NE) {
2510 if (const_args[2]) {
d81bad24
RH
2511 if (is_scalar) {
2512 tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2513 } else {
2514 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2515 }
14e4c1e2 2516 } else {
d81bad24
RH
2517 if (is_scalar) {
2518 tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2519 } else {
2520 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2521 }
14e4c1e2
RH
2522 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2523 }
2524 } else {
2525 if (const_args[2]) {
d81bad24
RH
2526 if (is_scalar) {
2527 insn = cmp0_scalar_insn[cond];
2528 if (insn) {
2529 tcg_out_insn_3612(s, insn, vece, a0, a1);
2530 break;
2531 }
2532 } else {
2533 insn = cmp0_vec_insn[cond];
2534 if (insn) {
2535 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2536 break;
2537 }
14e4c1e2 2538 }
4e186175 2539 tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
14e4c1e2
RH
2540 a2 = TCG_VEC_TMP;
2541 }
d81bad24
RH
2542 if (is_scalar) {
2543 insn = cmp_scalar_insn[cond];
2544 if (insn == 0) {
2545 TCGArg t;
2546 t = a1, a1 = a2, a2 = t;
2547 cond = tcg_swap_cond(cond);
2548 insn = cmp_scalar_insn[cond];
2549 tcg_debug_assert(insn != 0);
2550 }
2551 tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2552 } else {
2553 insn = cmp_vec_insn[cond];
2554 if (insn == 0) {
2555 TCGArg t;
2556 t = a1, a1 = a2, a2 = t;
2557 cond = tcg_swap_cond(cond);
2558 insn = cmp_vec_insn[cond];
2559 tcg_debug_assert(insn != 0);
2560 }
2561 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
14e4c1e2 2562 }
14e4c1e2
RH
2563 }
2564 }
2565 break;
bab1671f 2566
a9e434a5
RH
2567 case INDEX_op_bitsel_vec:
2568 a3 = args[3];
2569 if (a0 == a3) {
2570 tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2571 } else if (a0 == a2) {
2572 tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2573 } else {
2574 if (a0 != a1) {
2575 tcg_out_mov(s, type, a0, a1);
2576 }
2577 tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2578 }
2579 break;
2580
bab1671f 2581 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
bab1671f 2582 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
14e4c1e2
RH
2583 default:
2584 g_assert_not_reached();
2585 }
2586}
2587
2588int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2589{
2590 switch (opc) {
2591 case INDEX_op_add_vec:
2592 case INDEX_op_sub_vec:
14e4c1e2
RH
2593 case INDEX_op_and_vec:
2594 case INDEX_op_or_vec:
2595 case INDEX_op_xor_vec:
2596 case INDEX_op_andc_vec:
2597 case INDEX_op_orc_vec:
2598 case INDEX_op_neg_vec:
a456394a 2599 case INDEX_op_abs_vec:
14e4c1e2
RH
2600 case INDEX_op_not_vec:
2601 case INDEX_op_cmp_vec:
2602 case INDEX_op_shli_vec:
2603 case INDEX_op_shri_vec:
2604 case INDEX_op_sari_vec:
d32648d4
RH
2605 case INDEX_op_ssadd_vec:
2606 case INDEX_op_sssub_vec:
2607 case INDEX_op_usadd_vec:
2608 case INDEX_op_ussub_vec:
79525dfd 2609 case INDEX_op_shlv_vec:
a9e434a5 2610 case INDEX_op_bitsel_vec:
14e4c1e2 2611 return 1;
7cff8988 2612 case INDEX_op_rotli_vec:
79525dfd
RH
2613 case INDEX_op_shrv_vec:
2614 case INDEX_op_sarv_vec:
7cff8988
RH
2615 case INDEX_op_rotlv_vec:
2616 case INDEX_op_rotrv_vec:
79525dfd 2617 return -1;
e65a5f22 2618 case INDEX_op_mul_vec:
a7b6d286
RH
2619 case INDEX_op_smax_vec:
2620 case INDEX_op_smin_vec:
2621 case INDEX_op_umax_vec:
2622 case INDEX_op_umin_vec:
e65a5f22 2623 return vece < MO_64;
14e4c1e2
RH
2624
2625 default:
2626 return 0;
2627 }
2628}
2629
2630void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2631 TCGArg a0, ...)
2632{
79525dfd 2633 va_list va;
10061ffe 2634 TCGv_vec v0, v1, v2, t1, t2, c1;
7cff8988 2635 TCGArg a2;
79525dfd
RH
2636
2637 va_start(va, a0);
2638 v0 = temp_tcgv_vec(arg_temp(a0));
2639 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
7cff8988 2640 a2 = va_arg(va, TCGArg);
2dfa2f18 2641 va_end(va);
79525dfd
RH
2642
2643 switch (opc) {
7cff8988
RH
2644 case INDEX_op_rotli_vec:
2645 t1 = tcg_temp_new_vec(type);
2646 tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2647 vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2648 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2649 tcg_temp_free_vec(t1);
2650 break;
2651
79525dfd
RH
2652 case INDEX_op_shrv_vec:
2653 case INDEX_op_sarv_vec:
2654 /* Right shifts are negative left shifts for AArch64. */
2dfa2f18 2655 v2 = temp_tcgv_vec(arg_temp(a2));
79525dfd
RH
2656 t1 = tcg_temp_new_vec(type);
2657 tcg_gen_neg_vec(vece, t1, v2);
2658 opc = (opc == INDEX_op_shrv_vec
2659 ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2660 vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2661 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2662 tcg_temp_free_vec(t1);
2663 break;
2664
7cff8988 2665 case INDEX_op_rotlv_vec:
2dfa2f18 2666 v2 = temp_tcgv_vec(arg_temp(a2));
7cff8988 2667 t1 = tcg_temp_new_vec(type);
10061ffe
RH
2668 c1 = tcg_constant_vec(type, vece, 8 << vece);
2669 tcg_gen_sub_vec(vece, t1, v2, c1);
7cff8988
RH
2670 /* Right shifts are negative left shifts for AArch64. */
2671 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2672 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2673 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2674 tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2675 tcg_gen_or_vec(vece, v0, v0, t1);
2676 tcg_temp_free_vec(t1);
2677 break;
2678
2679 case INDEX_op_rotrv_vec:
2dfa2f18 2680 v2 = temp_tcgv_vec(arg_temp(a2));
7cff8988
RH
2681 t1 = tcg_temp_new_vec(type);
2682 t2 = tcg_temp_new_vec(type);
10061ffe 2683 c1 = tcg_constant_vec(type, vece, 8 << vece);
7cff8988 2684 tcg_gen_neg_vec(vece, t1, v2);
10061ffe 2685 tcg_gen_sub_vec(vece, t2, c1, v2);
7cff8988
RH
2686 /* Right shifts are negative left shifts for AArch64. */
2687 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2688 tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2689 vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2690 tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2691 tcg_gen_or_vec(vece, v0, t1, t2);
2692 tcg_temp_free_vec(t1);
2693 tcg_temp_free_vec(t2);
2694 break;
2695
79525dfd
RH
2696 default:
2697 g_assert_not_reached();
2698 }
14e4c1e2
RH
2699}
2700
39e7522b
RH
2701static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2702{
1897cc2e
RH
2703 switch (op) {
2704 case INDEX_op_goto_ptr:
39e7522b 2705 return C_O0_I1(r);
f69d277e 2706
1897cc2e
RH
2707 case INDEX_op_ld8u_i32:
2708 case INDEX_op_ld8s_i32:
2709 case INDEX_op_ld16u_i32:
2710 case INDEX_op_ld16s_i32:
2711 case INDEX_op_ld_i32:
2712 case INDEX_op_ld8u_i64:
2713 case INDEX_op_ld8s_i64:
2714 case INDEX_op_ld16u_i64:
2715 case INDEX_op_ld16s_i64:
2716 case INDEX_op_ld32u_i64:
2717 case INDEX_op_ld32s_i64:
2718 case INDEX_op_ld_i64:
2719 case INDEX_op_neg_i32:
2720 case INDEX_op_neg_i64:
2721 case INDEX_op_not_i32:
2722 case INDEX_op_not_i64:
2723 case INDEX_op_bswap16_i32:
2724 case INDEX_op_bswap32_i32:
2725 case INDEX_op_bswap16_i64:
2726 case INDEX_op_bswap32_i64:
2727 case INDEX_op_bswap64_i64:
2728 case INDEX_op_ext8s_i32:
2729 case INDEX_op_ext16s_i32:
2730 case INDEX_op_ext8u_i32:
2731 case INDEX_op_ext16u_i32:
2732 case INDEX_op_ext8s_i64:
2733 case INDEX_op_ext16s_i64:
2734 case INDEX_op_ext32s_i64:
2735 case INDEX_op_ext8u_i64:
2736 case INDEX_op_ext16u_i64:
2737 case INDEX_op_ext32u_i64:
2738 case INDEX_op_ext_i32_i64:
2739 case INDEX_op_extu_i32_i64:
2740 case INDEX_op_extract_i32:
2741 case INDEX_op_extract_i64:
2742 case INDEX_op_sextract_i32:
2743 case INDEX_op_sextract_i64:
39e7522b 2744 return C_O1_I1(r, r);
1897cc2e
RH
2745
2746 case INDEX_op_st8_i32:
2747 case INDEX_op_st16_i32:
2748 case INDEX_op_st_i32:
2749 case INDEX_op_st8_i64:
2750 case INDEX_op_st16_i64:
2751 case INDEX_op_st32_i64:
2752 case INDEX_op_st_i64:
39e7522b 2753 return C_O0_I2(rZ, r);
1897cc2e
RH
2754
2755 case INDEX_op_add_i32:
2756 case INDEX_op_add_i64:
2757 case INDEX_op_sub_i32:
2758 case INDEX_op_sub_i64:
2759 case INDEX_op_setcond_i32:
2760 case INDEX_op_setcond_i64:
39e7522b 2761 return C_O1_I2(r, r, rA);
1897cc2e
RH
2762
2763 case INDEX_op_mul_i32:
2764 case INDEX_op_mul_i64:
2765 case INDEX_op_div_i32:
2766 case INDEX_op_div_i64:
2767 case INDEX_op_divu_i32:
2768 case INDEX_op_divu_i64:
2769 case INDEX_op_rem_i32:
2770 case INDEX_op_rem_i64:
2771 case INDEX_op_remu_i32:
2772 case INDEX_op_remu_i64:
2773 case INDEX_op_muluh_i64:
2774 case INDEX_op_mulsh_i64:
39e7522b 2775 return C_O1_I2(r, r, r);
1897cc2e
RH
2776
2777 case INDEX_op_and_i32:
2778 case INDEX_op_and_i64:
2779 case INDEX_op_or_i32:
2780 case INDEX_op_or_i64:
2781 case INDEX_op_xor_i32:
2782 case INDEX_op_xor_i64:
2783 case INDEX_op_andc_i32:
2784 case INDEX_op_andc_i64:
2785 case INDEX_op_orc_i32:
2786 case INDEX_op_orc_i64:
2787 case INDEX_op_eqv_i32:
2788 case INDEX_op_eqv_i64:
39e7522b 2789 return C_O1_I2(r, r, rL);
1897cc2e
RH
2790
2791 case INDEX_op_shl_i32:
2792 case INDEX_op_shr_i32:
2793 case INDEX_op_sar_i32:
2794 case INDEX_op_rotl_i32:
2795 case INDEX_op_rotr_i32:
2796 case INDEX_op_shl_i64:
2797 case INDEX_op_shr_i64:
2798 case INDEX_op_sar_i64:
2799 case INDEX_op_rotl_i64:
2800 case INDEX_op_rotr_i64:
39e7522b 2801 return C_O1_I2(r, r, ri);
1897cc2e
RH
2802
2803 case INDEX_op_clz_i32:
2804 case INDEX_op_ctz_i32:
2805 case INDEX_op_clz_i64:
2806 case INDEX_op_ctz_i64:
39e7522b 2807 return C_O1_I2(r, r, rAL);
1897cc2e
RH
2808
2809 case INDEX_op_brcond_i32:
2810 case INDEX_op_brcond_i64:
39e7522b 2811 return C_O0_I2(r, rA);
1897cc2e
RH
2812
2813 case INDEX_op_movcond_i32:
2814 case INDEX_op_movcond_i64:
39e7522b 2815 return C_O1_I4(r, r, rA, rZ, rZ);
1897cc2e
RH
2816
2817 case INDEX_op_qemu_ld_i32:
2818 case INDEX_op_qemu_ld_i64:
39e7522b 2819 return C_O1_I1(r, l);
1897cc2e
RH
2820 case INDEX_op_qemu_st_i32:
2821 case INDEX_op_qemu_st_i64:
39e7522b 2822 return C_O0_I2(lZ, l);
1897cc2e
RH
2823
2824 case INDEX_op_deposit_i32:
2825 case INDEX_op_deposit_i64:
39e7522b 2826 return C_O1_I2(r, 0, rZ);
1897cc2e 2827
464c2969
RH
2828 case INDEX_op_extract2_i32:
2829 case INDEX_op_extract2_i64:
39e7522b 2830 return C_O1_I2(r, rZ, rZ);
464c2969 2831
1897cc2e
RH
2832 case INDEX_op_add2_i32:
2833 case INDEX_op_add2_i64:
2834 case INDEX_op_sub2_i32:
2835 case INDEX_op_sub2_i64:
39e7522b 2836 return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
1897cc2e 2837
14e4c1e2
RH
2838 case INDEX_op_add_vec:
2839 case INDEX_op_sub_vec:
2840 case INDEX_op_mul_vec:
14e4c1e2 2841 case INDEX_op_xor_vec:
d32648d4
RH
2842 case INDEX_op_ssadd_vec:
2843 case INDEX_op_sssub_vec:
2844 case INDEX_op_usadd_vec:
2845 case INDEX_op_ussub_vec:
93f332a5
RH
2846 case INDEX_op_smax_vec:
2847 case INDEX_op_smin_vec:
2848 case INDEX_op_umax_vec:
2849 case INDEX_op_umin_vec:
79525dfd
RH
2850 case INDEX_op_shlv_vec:
2851 case INDEX_op_shrv_vec:
2852 case INDEX_op_sarv_vec:
2853 case INDEX_op_aa64_sshl_vec:
39e7522b 2854 return C_O1_I2(w, w, w);
14e4c1e2
RH
2855 case INDEX_op_not_vec:
2856 case INDEX_op_neg_vec:
a456394a 2857 case INDEX_op_abs_vec:
14e4c1e2
RH
2858 case INDEX_op_shli_vec:
2859 case INDEX_op_shri_vec:
2860 case INDEX_op_sari_vec:
39e7522b 2861 return C_O1_I1(w, w);
14e4c1e2 2862 case INDEX_op_ld_vec:
37ee55a0 2863 case INDEX_op_dupm_vec:
39e7522b
RH
2864 return C_O1_I1(w, r);
2865 case INDEX_op_st_vec:
2866 return C_O0_I2(w, r);
14e4c1e2 2867 case INDEX_op_dup_vec:
39e7522b 2868 return C_O1_I1(w, wr);
9e27f58b
RH
2869 case INDEX_op_or_vec:
2870 case INDEX_op_andc_vec:
39e7522b 2871 return C_O1_I2(w, w, wO);
9e27f58b
RH
2872 case INDEX_op_and_vec:
2873 case INDEX_op_orc_vec:
39e7522b 2874 return C_O1_I2(w, w, wN);
14e4c1e2 2875 case INDEX_op_cmp_vec:
39e7522b 2876 return C_O1_I2(w, w, wZ);
a9e434a5 2877 case INDEX_op_bitsel_vec:
39e7522b 2878 return C_O1_I3(w, w, w, w);
7cff8988 2879 case INDEX_op_aa64_sli_vec:
39e7522b 2880 return C_O1_I2(w, 0, w);
14e4c1e2 2881
1897cc2e 2882 default:
39e7522b 2883 g_assert_not_reached();
f69d277e 2884 }
f69d277e
RH
2885}
2886
4a136e0a
CF
2887static void tcg_target_init(TCGContext *s)
2888{
f46934df
RH
2889 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2890 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
14e4c1e2
RH
2891 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2892 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
f46934df 2893
14e4c1e2 2894 tcg_target_call_clobber_regs = -1ull;
f46934df
RH
2895 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2896 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2897 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2898 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2899 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2900 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2901 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2902 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2903 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2904 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2905 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
14e4c1e2
RH
2906 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2907 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2908 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2909 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2910 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2911 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2912 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2913 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4a136e0a 2914
ccb1bb66 2915 s->reserved_regs = 0;
4a136e0a
CF
2916 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2917 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2918 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2919 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
14e4c1e2 2920 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
4a136e0a
CF
2921}
2922
38d195aa
RH
2923/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2924#define PUSH_SIZE ((30 - 19 + 1) * 8)
2925
2926#define FRAME_SIZE \
2927 ((PUSH_SIZE \
2928 + TCG_STATIC_CALL_ARGS_SIZE \
2929 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2930 + TCG_TARGET_STACK_ALIGN - 1) \
2931 & ~(TCG_TARGET_STACK_ALIGN - 1))
2932
2933/* We're expecting a 2 byte uleb128 encoded value. */
2934QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2935
2936/* We're expecting to use a single ADDI insn. */
2937QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2938
4a136e0a
CF
2939static void tcg_target_qemu_prologue(TCGContext *s)
2940{
4a136e0a
CF
2941 TCGReg r;
2942
95f72aa9
RH
2943 /* Push (FP, LR) and allocate space for all saved registers. */
2944 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
38d195aa 2945 TCG_REG_SP, -PUSH_SIZE, 1, 1);
4a136e0a 2946
d82b78e4 2947 /* Set up frame pointer for canonical unwinding. */
929f8b55 2948 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
4a136e0a 2949
d82b78e4 2950 /* Store callee-preserved regs x19..x28. */
4a136e0a 2951 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
2952 int ofs = (r - TCG_REG_X19 + 2) * 8;
2953 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
2954 }
2955
096c46c0
RH
2956 /* Make stack space for TCG locals. */
2957 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 2958 FRAME_SIZE - PUSH_SIZE);
096c46c0 2959
95f72aa9 2960 /* Inform TCG about how to find TCG locals with register, offset, size. */
4a136e0a
CF
2961 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2962 CPU_TEMP_BUF_NLONGS * sizeof(long));
2963
4cbea598 2964#if !defined(CONFIG_SOFTMMU)
352bcb0a 2965 if (USE_GUEST_BASE) {
b76f21a7 2966 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
6a91c7c9
JK
2967 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2968 }
2969#endif
2970
4a136e0a 2971 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
81d8a5ee 2972 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
4a136e0a 2973
b19f0c2e
RH
2974 /*
2975 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2976 * and fall through to the rest of the epilogue.
2977 */
c8bc1168 2978 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
b19f0c2e
RH
2979 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2980
2981 /* TB epilogue */
ffba3eb3 2982 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
4a136e0a 2983
096c46c0
RH
2984 /* Remove TCG locals stack space. */
2985 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
38d195aa 2986 FRAME_SIZE - PUSH_SIZE);
4a136e0a 2987
95f72aa9 2988 /* Restore registers x19..x28. */
4a136e0a 2989 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
95f72aa9
RH
2990 int ofs = (r - TCG_REG_X19 + 2) * 8;
2991 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
4a136e0a
CF
2992 }
2993
95f72aa9
RH
2994 /* Pop (FP, LR), restore SP to previous frame. */
2995 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
38d195aa 2996 TCG_REG_SP, PUSH_SIZE, 0, 1);
81d8a5ee 2997 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
4a136e0a 2998}
38d195aa 2999
55129955
RH
3000static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3001{
3002 int i;
3003 for (i = 0; i < count; ++i) {
3004 p[i] = NOP;
3005 }
3006}
3007
38d195aa 3008typedef struct {
3d9bddb3 3009 DebugFrameHeader h;
38d195aa
RH
3010 uint8_t fde_def_cfa[4];
3011 uint8_t fde_reg_ofs[24];
3012} DebugFrame;
3013
3014#define ELF_HOST_MACHINE EM_AARCH64
3015
3d9bddb3
RH
3016static const DebugFrame debug_frame = {
3017 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3018 .h.cie.id = -1,
3019 .h.cie.version = 1,
3020 .h.cie.code_align = 1,
3021 .h.cie.data_align = 0x78, /* sleb128 -8 */
3022 .h.cie.return_column = TCG_REG_LR,
38d195aa
RH
3023
3024 /* Total FDE size does not include the "len" member. */
3d9bddb3 3025 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
38d195aa
RH
3026
3027 .fde_def_cfa = {
3028 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
3029 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3030 (FRAME_SIZE >> 7)
3031 },
3032 .fde_reg_ofs = {
3033 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
3034 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
3035 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
3036 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
3037 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
3038 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
3039 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
3040 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
3041 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
3042 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
3043 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
3044 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
3045 }
3046};
3047
755bf9e5 3048void tcg_register_jit(const void *buf, size_t buf_size)
38d195aa 3049{
38d195aa
RH
3050 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3051}