]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.inc.c
Merge remote-tracking branch 'remotes/xtensa/tags/20190122-xtensa' into staging
[mirror_qemu.git] / tcg / aarch64 / tcg-target.inc.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32 };
33 #endif /* CONFIG_DEBUG_TCG */
34
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
43
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
51
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59 };
60
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64 };
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
67 };
68
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
71
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
80
81 static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
82 {
83 ptrdiff_t offset = target - code_ptr;
84 if (offset == sextract64(offset, 0, 26)) {
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88 return true;
89 }
90 return false;
91 }
92
93 static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
94 {
95 ptrdiff_t offset = target - code_ptr;
96 if (offset == sextract64(offset, 0, 19)) {
97 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
98 return true;
99 }
100 return false;
101 }
102
103 static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
104 intptr_t value, intptr_t addend)
105 {
106 tcg_debug_assert(addend == 0);
107 switch (type) {
108 case R_AARCH64_JUMP26:
109 case R_AARCH64_CALL26:
110 return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
111 case R_AARCH64_CONDBR19:
112 return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
113 default:
114 g_assert_not_reached();
115 }
116 }
117
118 #define TCG_CT_CONST_AIMM 0x100
119 #define TCG_CT_CONST_LIMM 0x200
120 #define TCG_CT_CONST_ZERO 0x400
121 #define TCG_CT_CONST_MONE 0x800
122
123 /* parse target specific constraints */
124 static const char *target_parse_constraint(TCGArgConstraint *ct,
125 const char *ct_str, TCGType type)
126 {
127 switch (*ct_str++) {
128 case 'r': /* general registers */
129 ct->ct |= TCG_CT_REG;
130 ct->u.regs |= 0xffffffffu;
131 break;
132 case 'w': /* advsimd registers */
133 ct->ct |= TCG_CT_REG;
134 ct->u.regs |= 0xffffffff00000000ull;
135 break;
136 case 'l': /* qemu_ld / qemu_st address, data_reg */
137 ct->ct |= TCG_CT_REG;
138 ct->u.regs = 0xffffffffu;
139 #ifdef CONFIG_SOFTMMU
140 /* x0 and x1 will be overwritten when reading the tlb entry,
141 and x2, and x3 for helper args, better to avoid using them. */
142 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
143 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
144 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
145 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
146 #endif
147 break;
148 case 'A': /* Valid for arithmetic immediate (positive or negative). */
149 ct->ct |= TCG_CT_CONST_AIMM;
150 break;
151 case 'L': /* Valid for logical immediate. */
152 ct->ct |= TCG_CT_CONST_LIMM;
153 break;
154 case 'M': /* minus one */
155 ct->ct |= TCG_CT_CONST_MONE;
156 break;
157 case 'Z': /* zero */
158 ct->ct |= TCG_CT_CONST_ZERO;
159 break;
160 default:
161 return NULL;
162 }
163 return ct_str;
164 }
165
166 /* Match a constant valid for addition (12-bit, optionally shifted). */
167 static inline bool is_aimm(uint64_t val)
168 {
169 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
170 }
171
172 /* Match a constant valid for logical operations. */
173 static inline bool is_limm(uint64_t val)
174 {
175 /* Taking a simplified view of the logical immediates for now, ignoring
176 the replication that can happen across the field. Match bit patterns
177 of the forms
178 0....01....1
179 0..01..10..0
180 and their inverses. */
181
182 /* Make things easier below, by testing the form with msb clear. */
183 if ((int64_t)val < 0) {
184 val = ~val;
185 }
186 if (val == 0) {
187 return false;
188 }
189 val += val & -val;
190 return (val & (val - 1)) == 0;
191 }
192
193 /* Match a constant that is valid for vectors. */
194 static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
195 {
196 int i;
197
198 *op = 0;
199 /* Match replication across 8 bits. */
200 if (v64 == dup_const(MO_8, v64)) {
201 *cmode = 0xe;
202 *imm8 = v64 & 0xff;
203 return true;
204 }
205 /* Match replication across 16 bits. */
206 if (v64 == dup_const(MO_16, v64)) {
207 uint16_t v16 = v64;
208
209 if (v16 == (v16 & 0xff)) {
210 *cmode = 0x8;
211 *imm8 = v16 & 0xff;
212 return true;
213 } else if (v16 == (v16 & 0xff00)) {
214 *cmode = 0xa;
215 *imm8 = v16 >> 8;
216 return true;
217 }
218 }
219 /* Match replication across 32 bits. */
220 if (v64 == dup_const(MO_32, v64)) {
221 uint32_t v32 = v64;
222
223 if (v32 == (v32 & 0xff)) {
224 *cmode = 0x0;
225 *imm8 = v32 & 0xff;
226 return true;
227 } else if (v32 == (v32 & 0xff00)) {
228 *cmode = 0x2;
229 *imm8 = (v32 >> 8) & 0xff;
230 return true;
231 } else if (v32 == (v32 & 0xff0000)) {
232 *cmode = 0x4;
233 *imm8 = (v32 >> 16) & 0xff;
234 return true;
235 } else if (v32 == (v32 & 0xff000000)) {
236 *cmode = 0x6;
237 *imm8 = v32 >> 24;
238 return true;
239 } else if ((v32 & 0xffff00ff) == 0xff) {
240 *cmode = 0xc;
241 *imm8 = (v32 >> 8) & 0xff;
242 return true;
243 } else if ((v32 & 0xff00ffff) == 0xffff) {
244 *cmode = 0xd;
245 *imm8 = (v32 >> 16) & 0xff;
246 return true;
247 }
248 /* Match forms of a float32. */
249 if (extract32(v32, 0, 19) == 0
250 && (extract32(v32, 25, 6) == 0x20
251 || extract32(v32, 25, 6) == 0x1f)) {
252 *cmode = 0xf;
253 *imm8 = (extract32(v32, 31, 1) << 7)
254 | (extract32(v32, 25, 1) << 6)
255 | extract32(v32, 19, 6);
256 return true;
257 }
258 }
259 /* Match forms of a float64. */
260 if (extract64(v64, 0, 48) == 0
261 && (extract64(v64, 54, 9) == 0x100
262 || extract64(v64, 54, 9) == 0x0ff)) {
263 *cmode = 0xf;
264 *op = 1;
265 *imm8 = (extract64(v64, 63, 1) << 7)
266 | (extract64(v64, 54, 1) << 6)
267 | extract64(v64, 48, 6);
268 return true;
269 }
270 /* Match bytes of 0x00 and 0xff. */
271 for (i = 0; i < 64; i += 8) {
272 uint64_t byte = extract64(v64, i, 8);
273 if (byte != 0 && byte != 0xff) {
274 break;
275 }
276 }
277 if (i == 64) {
278 *cmode = 0xe;
279 *op = 1;
280 *imm8 = (extract64(v64, 0, 1) << 0)
281 | (extract64(v64, 8, 1) << 1)
282 | (extract64(v64, 16, 1) << 2)
283 | (extract64(v64, 24, 1) << 3)
284 | (extract64(v64, 32, 1) << 4)
285 | (extract64(v64, 40, 1) << 5)
286 | (extract64(v64, 48, 1) << 6)
287 | (extract64(v64, 56, 1) << 7);
288 return true;
289 }
290 return false;
291 }
292
293 static int tcg_target_const_match(tcg_target_long val, TCGType type,
294 const TCGArgConstraint *arg_ct)
295 {
296 int ct = arg_ct->ct;
297
298 if (ct & TCG_CT_CONST) {
299 return 1;
300 }
301 if (type == TCG_TYPE_I32) {
302 val = (int32_t)val;
303 }
304 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
305 return 1;
306 }
307 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
308 return 1;
309 }
310 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
311 return 1;
312 }
313 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
314 return 1;
315 }
316
317 return 0;
318 }
319
320 enum aarch64_cond_code {
321 COND_EQ = 0x0,
322 COND_NE = 0x1,
323 COND_CS = 0x2, /* Unsigned greater or equal */
324 COND_HS = COND_CS, /* ALIAS greater or equal */
325 COND_CC = 0x3, /* Unsigned less than */
326 COND_LO = COND_CC, /* ALIAS Lower */
327 COND_MI = 0x4, /* Negative */
328 COND_PL = 0x5, /* Zero or greater */
329 COND_VS = 0x6, /* Overflow */
330 COND_VC = 0x7, /* No overflow */
331 COND_HI = 0x8, /* Unsigned greater than */
332 COND_LS = 0x9, /* Unsigned less or equal */
333 COND_GE = 0xa,
334 COND_LT = 0xb,
335 COND_GT = 0xc,
336 COND_LE = 0xd,
337 COND_AL = 0xe,
338 COND_NV = 0xf, /* behaves like COND_AL here */
339 };
340
341 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
342 [TCG_COND_EQ] = COND_EQ,
343 [TCG_COND_NE] = COND_NE,
344 [TCG_COND_LT] = COND_LT,
345 [TCG_COND_GE] = COND_GE,
346 [TCG_COND_LE] = COND_LE,
347 [TCG_COND_GT] = COND_GT,
348 /* unsigned */
349 [TCG_COND_LTU] = COND_LO,
350 [TCG_COND_GTU] = COND_HI,
351 [TCG_COND_GEU] = COND_HS,
352 [TCG_COND_LEU] = COND_LS,
353 };
354
355 typedef enum {
356 LDST_ST = 0, /* store */
357 LDST_LD = 1, /* load */
358 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
359 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
360 } AArch64LdstType;
361
362 /* We encode the format of the insn into the beginning of the name, so that
363 we can have the preprocessor help "typecheck" the insn vs the output
364 function. Arm didn't provide us with nice names for the formats, so we
365 use the section number of the architecture reference manual in which the
366 instruction group is described. */
367 typedef enum {
368 /* Compare and branch (immediate). */
369 I3201_CBZ = 0x34000000,
370 I3201_CBNZ = 0x35000000,
371
372 /* Conditional branch (immediate). */
373 I3202_B_C = 0x54000000,
374
375 /* Unconditional branch (immediate). */
376 I3206_B = 0x14000000,
377 I3206_BL = 0x94000000,
378
379 /* Unconditional branch (register). */
380 I3207_BR = 0xd61f0000,
381 I3207_BLR = 0xd63f0000,
382 I3207_RET = 0xd65f0000,
383
384 /* Load literal for loading the address at pc-relative offset */
385 I3305_LDR = 0x58000000,
386 I3305_LDR_v64 = 0x5c000000,
387 I3305_LDR_v128 = 0x9c000000,
388
389 /* Load/store register. Described here as 3.3.12, but the helper
390 that emits them can transform to 3.3.10 or 3.3.13. */
391 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
392 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
393 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
394 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
395
396 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
397 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
398 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
399 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
400
401 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
402 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
403
404 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
405 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
406 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
407
408 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
409 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
410
411 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
412 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
413
414 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
415 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
416
417 I3312_TO_I3310 = 0x00200800,
418 I3312_TO_I3313 = 0x01000000,
419
420 /* Load/store register pair instructions. */
421 I3314_LDP = 0x28400000,
422 I3314_STP = 0x28000000,
423
424 /* Add/subtract immediate instructions. */
425 I3401_ADDI = 0x11000000,
426 I3401_ADDSI = 0x31000000,
427 I3401_SUBI = 0x51000000,
428 I3401_SUBSI = 0x71000000,
429
430 /* Bitfield instructions. */
431 I3402_BFM = 0x33000000,
432 I3402_SBFM = 0x13000000,
433 I3402_UBFM = 0x53000000,
434
435 /* Extract instruction. */
436 I3403_EXTR = 0x13800000,
437
438 /* Logical immediate instructions. */
439 I3404_ANDI = 0x12000000,
440 I3404_ORRI = 0x32000000,
441 I3404_EORI = 0x52000000,
442
443 /* Move wide immediate instructions. */
444 I3405_MOVN = 0x12800000,
445 I3405_MOVZ = 0x52800000,
446 I3405_MOVK = 0x72800000,
447
448 /* PC relative addressing instructions. */
449 I3406_ADR = 0x10000000,
450 I3406_ADRP = 0x90000000,
451
452 /* Add/subtract shifted register instructions (without a shift). */
453 I3502_ADD = 0x0b000000,
454 I3502_ADDS = 0x2b000000,
455 I3502_SUB = 0x4b000000,
456 I3502_SUBS = 0x6b000000,
457
458 /* Add/subtract shifted register instructions (with a shift). */
459 I3502S_ADD_LSL = I3502_ADD,
460
461 /* Add/subtract with carry instructions. */
462 I3503_ADC = 0x1a000000,
463 I3503_SBC = 0x5a000000,
464
465 /* Conditional select instructions. */
466 I3506_CSEL = 0x1a800000,
467 I3506_CSINC = 0x1a800400,
468 I3506_CSINV = 0x5a800000,
469 I3506_CSNEG = 0x5a800400,
470
471 /* Data-processing (1 source) instructions. */
472 I3507_CLZ = 0x5ac01000,
473 I3507_RBIT = 0x5ac00000,
474 I3507_REV16 = 0x5ac00400,
475 I3507_REV32 = 0x5ac00800,
476 I3507_REV64 = 0x5ac00c00,
477
478 /* Data-processing (2 source) instructions. */
479 I3508_LSLV = 0x1ac02000,
480 I3508_LSRV = 0x1ac02400,
481 I3508_ASRV = 0x1ac02800,
482 I3508_RORV = 0x1ac02c00,
483 I3508_SMULH = 0x9b407c00,
484 I3508_UMULH = 0x9bc07c00,
485 I3508_UDIV = 0x1ac00800,
486 I3508_SDIV = 0x1ac00c00,
487
488 /* Data-processing (3 source) instructions. */
489 I3509_MADD = 0x1b000000,
490 I3509_MSUB = 0x1b008000,
491
492 /* Logical shifted register instructions (without a shift). */
493 I3510_AND = 0x0a000000,
494 I3510_BIC = 0x0a200000,
495 I3510_ORR = 0x2a000000,
496 I3510_ORN = 0x2a200000,
497 I3510_EOR = 0x4a000000,
498 I3510_EON = 0x4a200000,
499 I3510_ANDS = 0x6a000000,
500
501 /* AdvSIMD copy */
502 I3605_DUP = 0x0e000400,
503 I3605_INS = 0x4e001c00,
504 I3605_UMOV = 0x0e003c00,
505
506 /* AdvSIMD modified immediate */
507 I3606_MOVI = 0x0f000400,
508
509 /* AdvSIMD shift by immediate */
510 I3614_SSHR = 0x0f000400,
511 I3614_SSRA = 0x0f001400,
512 I3614_SHL = 0x0f005400,
513 I3614_USHR = 0x2f000400,
514 I3614_USRA = 0x2f001400,
515
516 /* AdvSIMD three same. */
517 I3616_ADD = 0x0e208400,
518 I3616_AND = 0x0e201c00,
519 I3616_BIC = 0x0e601c00,
520 I3616_EOR = 0x2e201c00,
521 I3616_MUL = 0x0e209c00,
522 I3616_ORR = 0x0ea01c00,
523 I3616_ORN = 0x0ee01c00,
524 I3616_SUB = 0x2e208400,
525 I3616_CMGT = 0x0e203400,
526 I3616_CMGE = 0x0e203c00,
527 I3616_CMTST = 0x0e208c00,
528 I3616_CMHI = 0x2e203400,
529 I3616_CMHS = 0x2e203c00,
530 I3616_CMEQ = 0x2e208c00,
531
532 /* AdvSIMD two-reg misc. */
533 I3617_CMGT0 = 0x0e208800,
534 I3617_CMEQ0 = 0x0e209800,
535 I3617_CMLT0 = 0x0e20a800,
536 I3617_CMGE0 = 0x2e208800,
537 I3617_CMLE0 = 0x2e20a800,
538 I3617_NOT = 0x2e205800,
539 I3617_NEG = 0x2e20b800,
540
541 /* System instructions. */
542 NOP = 0xd503201f,
543 DMB_ISH = 0xd50338bf,
544 DMB_LD = 0x00000100,
545 DMB_ST = 0x00000200,
546 } AArch64Insn;
547
548 static inline uint32_t tcg_in32(TCGContext *s)
549 {
550 uint32_t v = *(uint32_t *)s->code_ptr;
551 return v;
552 }
553
554 /* Emit an opcode with "type-checking" of the format. */
555 #define tcg_out_insn(S, FMT, OP, ...) \
556 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
557
558 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
559 {
560 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
561 }
562
563 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
564 TCGReg rt, int imm19)
565 {
566 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
567 }
568
569 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
570 TCGCond c, int imm19)
571 {
572 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
573 }
574
575 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
576 {
577 tcg_out32(s, insn | (imm26 & 0x03ffffff));
578 }
579
580 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
581 {
582 tcg_out32(s, insn | rn << 5);
583 }
584
585 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
586 TCGReg r1, TCGReg r2, TCGReg rn,
587 tcg_target_long ofs, bool pre, bool w)
588 {
589 insn |= 1u << 31; /* ext */
590 insn |= pre << 24;
591 insn |= w << 23;
592
593 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
594 insn |= (ofs & (0x7f << 3)) << (15 - 3);
595
596 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
597 }
598
599 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
600 TCGReg rd, TCGReg rn, uint64_t aimm)
601 {
602 if (aimm > 0xfff) {
603 tcg_debug_assert((aimm & 0xfff) == 0);
604 aimm >>= 12;
605 tcg_debug_assert(aimm <= 0xfff);
606 aimm |= 1 << 12; /* apply LSL 12 */
607 }
608 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
609 }
610
611 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
612 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
613 that feed the DecodeBitMasks pseudo function. */
614 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
615 TCGReg rd, TCGReg rn, int n, int immr, int imms)
616 {
617 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
618 | rn << 5 | rd);
619 }
620
621 #define tcg_out_insn_3404 tcg_out_insn_3402
622
623 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
624 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
625 {
626 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
627 | rn << 5 | rd);
628 }
629
630 /* This function is used for the Move (wide immediate) instruction group.
631 Note that SHIFT is a full shift count, not the 2 bit HW field. */
632 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
633 TCGReg rd, uint16_t half, unsigned shift)
634 {
635 tcg_debug_assert((shift & ~0x30) == 0);
636 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
637 }
638
639 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
640 TCGReg rd, int64_t disp)
641 {
642 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
643 }
644
645 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
646 the rare occasion when we actually want to supply a shift amount. */
647 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
648 TCGType ext, TCGReg rd, TCGReg rn,
649 TCGReg rm, int imm6)
650 {
651 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
652 }
653
654 /* This function is for 3.5.2 (Add/subtract shifted register),
655 and 3.5.10 (Logical shifted register), for the vast majorty of cases
656 when we don't want to apply a shift. Thus it can also be used for
657 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
658 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
659 TCGReg rd, TCGReg rn, TCGReg rm)
660 {
661 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
662 }
663
664 #define tcg_out_insn_3503 tcg_out_insn_3502
665 #define tcg_out_insn_3508 tcg_out_insn_3502
666 #define tcg_out_insn_3510 tcg_out_insn_3502
667
668 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
669 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
670 {
671 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
672 | tcg_cond_to_aarch64[c] << 12);
673 }
674
675 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
676 TCGReg rd, TCGReg rn)
677 {
678 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
679 }
680
681 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
682 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
683 {
684 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
685 }
686
687 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
688 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
689 {
690 /* Note that bit 11 set means general register input. Therefore
691 we can handle both register sets with one function. */
692 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
693 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
694 }
695
696 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
697 TCGReg rd, bool op, int cmode, uint8_t imm8)
698 {
699 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
700 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
701 }
702
703 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
704 TCGReg rd, TCGReg rn, unsigned immhb)
705 {
706 tcg_out32(s, insn | q << 30 | immhb << 16
707 | (rn & 0x1f) << 5 | (rd & 0x1f));
708 }
709
710 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
711 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
712 {
713 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
714 | (rn & 0x1f) << 5 | (rd & 0x1f));
715 }
716
717 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
718 unsigned size, TCGReg rd, TCGReg rn)
719 {
720 tcg_out32(s, insn | q << 30 | (size << 22)
721 | (rn & 0x1f) << 5 | (rd & 0x1f));
722 }
723
724 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
725 TCGReg rd, TCGReg base, TCGType ext,
726 TCGReg regoff)
727 {
728 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
729 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
730 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
731 }
732
733 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
734 TCGReg rd, TCGReg rn, intptr_t offset)
735 {
736 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
737 }
738
739 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
740 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
741 {
742 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
743 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
744 | rn << 5 | (rd & 0x1f));
745 }
746
747 /* Register to register move using ORR (shifted register with no shift). */
748 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
749 {
750 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
751 }
752
753 /* Register to register move using ADDI (move to/from SP). */
754 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
755 {
756 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
757 }
758
759 /* This function is used for the Logical (immediate) instruction group.
760 The value of LIMM must satisfy IS_LIMM. See the comment above about
761 only supporting simplified logical immediates. */
762 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
763 TCGReg rd, TCGReg rn, uint64_t limm)
764 {
765 unsigned h, l, r, c;
766
767 tcg_debug_assert(is_limm(limm));
768
769 h = clz64(limm);
770 l = ctz64(limm);
771 if (l == 0) {
772 r = 0; /* form 0....01....1 */
773 c = ctz64(~limm) - 1;
774 if (h == 0) {
775 r = clz64(~limm); /* form 1..10..01..1 */
776 c += r;
777 }
778 } else {
779 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
780 c = r - h - 1;
781 }
782 if (ext == TCG_TYPE_I32) {
783 r &= 31;
784 c &= 31;
785 }
786
787 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
788 }
789
790 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
791 TCGReg rd, uint64_t v64)
792 {
793 int op, cmode, imm8;
794
795 if (is_fimm(v64, &op, &cmode, &imm8)) {
796 tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
797 } else if (type == TCG_TYPE_V128) {
798 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
799 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
800 } else {
801 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
802 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
803 }
804 }
805
806 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
807 tcg_target_long value)
808 {
809 tcg_target_long svalue = value;
810 tcg_target_long ivalue = ~value;
811 tcg_target_long t0, t1, t2;
812 int s0, s1;
813 AArch64Insn opc;
814
815 switch (type) {
816 case TCG_TYPE_I32:
817 case TCG_TYPE_I64:
818 tcg_debug_assert(rd < 32);
819 break;
820
821 case TCG_TYPE_V64:
822 case TCG_TYPE_V128:
823 tcg_debug_assert(rd >= 32);
824 tcg_out_dupi_vec(s, type, rd, value);
825 return;
826
827 default:
828 g_assert_not_reached();
829 }
830
831 /* For 32-bit values, discard potential garbage in value. For 64-bit
832 values within [2**31, 2**32-1], we can create smaller sequences by
833 interpreting this as a negative 32-bit number, while ensuring that
834 the high 32 bits are cleared by setting SF=0. */
835 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
836 svalue = (int32_t)value;
837 value = (uint32_t)value;
838 ivalue = (uint32_t)ivalue;
839 type = TCG_TYPE_I32;
840 }
841
842 /* Speed things up by handling the common case of small positive
843 and negative values specially. */
844 if ((value & ~0xffffull) == 0) {
845 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
846 return;
847 } else if ((ivalue & ~0xffffull) == 0) {
848 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
849 return;
850 }
851
852 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
853 use the sign-extended value. That lets us match rotated values such
854 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
855 if (is_limm(svalue)) {
856 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
857 return;
858 }
859
860 /* Look for host pointer values within 4G of the PC. This happens
861 often when loading pointers to QEMU's own data structures. */
862 if (type == TCG_TYPE_I64) {
863 tcg_target_long disp = value - (intptr_t)s->code_ptr;
864 if (disp == sextract64(disp, 0, 21)) {
865 tcg_out_insn(s, 3406, ADR, rd, disp);
866 return;
867 }
868 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
869 if (disp == sextract64(disp, 0, 21)) {
870 tcg_out_insn(s, 3406, ADRP, rd, disp);
871 if (value & 0xfff) {
872 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
873 }
874 return;
875 }
876 }
877
878 /* Would it take fewer insns to begin with MOVN? */
879 if (ctpop64(value) >= 32) {
880 t0 = ivalue;
881 opc = I3405_MOVN;
882 } else {
883 t0 = value;
884 opc = I3405_MOVZ;
885 }
886 s0 = ctz64(t0) & (63 & -16);
887 t1 = t0 & ~(0xffffUL << s0);
888 s1 = ctz64(t1) & (63 & -16);
889 t2 = t1 & ~(0xffffUL << s1);
890 if (t2 == 0) {
891 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
892 if (t1 != 0) {
893 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
894 }
895 return;
896 }
897
898 /* For more than 2 insns, dump it into the constant pool. */
899 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
900 tcg_out_insn(s, 3305, LDR, 0, rd);
901 }
902
903 /* Define something more legible for general use. */
904 #define tcg_out_ldst_r tcg_out_insn_3310
905
906 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
907 TCGReg rn, intptr_t offset, int lgsize)
908 {
909 /* If the offset is naturally aligned and in range, then we can
910 use the scaled uimm12 encoding */
911 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
912 uintptr_t scaled_uimm = offset >> lgsize;
913 if (scaled_uimm <= 0xfff) {
914 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
915 return;
916 }
917 }
918
919 /* Small signed offsets can use the unscaled encoding. */
920 if (offset >= -256 && offset < 256) {
921 tcg_out_insn_3312(s, insn, rd, rn, offset);
922 return;
923 }
924
925 /* Worst-case scenario, move offset to temp register, use reg offset. */
926 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
927 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
928 }
929
930 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
931 {
932 if (ret == arg) {
933 return;
934 }
935 switch (type) {
936 case TCG_TYPE_I32:
937 case TCG_TYPE_I64:
938 if (ret < 32 && arg < 32) {
939 tcg_out_movr(s, type, ret, arg);
940 break;
941 } else if (ret < 32) {
942 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
943 break;
944 } else if (arg < 32) {
945 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
946 break;
947 }
948 /* FALLTHRU */
949
950 case TCG_TYPE_V64:
951 tcg_debug_assert(ret >= 32 && arg >= 32);
952 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
953 break;
954 case TCG_TYPE_V128:
955 tcg_debug_assert(ret >= 32 && arg >= 32);
956 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
957 break;
958
959 default:
960 g_assert_not_reached();
961 }
962 }
963
964 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
965 TCGReg base, intptr_t ofs)
966 {
967 AArch64Insn insn;
968 int lgsz;
969
970 switch (type) {
971 case TCG_TYPE_I32:
972 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
973 lgsz = 2;
974 break;
975 case TCG_TYPE_I64:
976 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
977 lgsz = 3;
978 break;
979 case TCG_TYPE_V64:
980 insn = I3312_LDRVD;
981 lgsz = 3;
982 break;
983 case TCG_TYPE_V128:
984 insn = I3312_LDRVQ;
985 lgsz = 4;
986 break;
987 default:
988 g_assert_not_reached();
989 }
990 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
991 }
992
993 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
994 TCGReg base, intptr_t ofs)
995 {
996 AArch64Insn insn;
997 int lgsz;
998
999 switch (type) {
1000 case TCG_TYPE_I32:
1001 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1002 lgsz = 2;
1003 break;
1004 case TCG_TYPE_I64:
1005 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1006 lgsz = 3;
1007 break;
1008 case TCG_TYPE_V64:
1009 insn = I3312_STRVD;
1010 lgsz = 3;
1011 break;
1012 case TCG_TYPE_V128:
1013 insn = I3312_STRVQ;
1014 lgsz = 4;
1015 break;
1016 default:
1017 g_assert_not_reached();
1018 }
1019 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1020 }
1021
1022 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1023 TCGReg base, intptr_t ofs)
1024 {
1025 if (type <= TCG_TYPE_I64 && val == 0) {
1026 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1027 return true;
1028 }
1029 return false;
1030 }
1031
1032 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1033 TCGReg rn, unsigned int a, unsigned int b)
1034 {
1035 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1036 }
1037
1038 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1039 TCGReg rn, unsigned int a, unsigned int b)
1040 {
1041 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1042 }
1043
1044 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1045 TCGReg rn, unsigned int a, unsigned int b)
1046 {
1047 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1048 }
1049
1050 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1051 TCGReg rn, TCGReg rm, unsigned int a)
1052 {
1053 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1054 }
1055
1056 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1057 TCGReg rd, TCGReg rn, unsigned int m)
1058 {
1059 int bits = ext ? 64 : 32;
1060 int max = bits - 1;
1061 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1062 }
1063
1064 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1065 TCGReg rd, TCGReg rn, unsigned int m)
1066 {
1067 int max = ext ? 63 : 31;
1068 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1069 }
1070
1071 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1072 TCGReg rd, TCGReg rn, unsigned int m)
1073 {
1074 int max = ext ? 63 : 31;
1075 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1076 }
1077
1078 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1079 TCGReg rd, TCGReg rn, unsigned int m)
1080 {
1081 int max = ext ? 63 : 31;
1082 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1083 }
1084
1085 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1086 TCGReg rd, TCGReg rn, unsigned int m)
1087 {
1088 int bits = ext ? 64 : 32;
1089 int max = bits - 1;
1090 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1091 }
1092
1093 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1094 TCGReg rn, unsigned lsb, unsigned width)
1095 {
1096 unsigned size = ext ? 64 : 32;
1097 unsigned a = (size - lsb) & (size - 1);
1098 unsigned b = width - 1;
1099 tcg_out_bfm(s, ext, rd, rn, a, b);
1100 }
1101
1102 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1103 tcg_target_long b, bool const_b)
1104 {
1105 if (const_b) {
1106 /* Using CMP or CMN aliases. */
1107 if (b >= 0) {
1108 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1109 } else {
1110 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1111 }
1112 } else {
1113 /* Using CMP alias SUBS wzr, Wn, Wm */
1114 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1115 }
1116 }
1117
1118 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1119 {
1120 ptrdiff_t offset = target - s->code_ptr;
1121 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1122 tcg_out_insn(s, 3206, B, offset);
1123 }
1124
1125 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1126 {
1127 ptrdiff_t offset = target - s->code_ptr;
1128 if (offset == sextract64(offset, 0, 26)) {
1129 tcg_out_insn(s, 3206, BL, offset);
1130 } else {
1131 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1132 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1133 }
1134 }
1135
1136 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1137 {
1138 tcg_out_insn(s, 3207, BLR, reg);
1139 }
1140
1141 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1142 {
1143 ptrdiff_t offset = target - s->code_ptr;
1144 if (offset == sextract64(offset, 0, 26)) {
1145 tcg_out_insn(s, 3206, BL, offset);
1146 } else {
1147 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1148 tcg_out_callr(s, TCG_REG_TMP);
1149 }
1150 }
1151
1152 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1153 uintptr_t addr)
1154 {
1155 tcg_insn_unit i1, i2;
1156 TCGType rt = TCG_TYPE_I64;
1157 TCGReg rd = TCG_REG_TMP;
1158 uint64_t pair;
1159
1160 ptrdiff_t offset = addr - jmp_addr;
1161
1162 if (offset == sextract64(offset, 0, 26)) {
1163 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1164 i2 = NOP;
1165 } else {
1166 offset = (addr >> 12) - (jmp_addr >> 12);
1167
1168 /* patch ADRP */
1169 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1170 /* patch ADDI */
1171 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1172 }
1173 pair = (uint64_t)i2 << 32 | i1;
1174 atomic_set((uint64_t *)jmp_addr, pair);
1175 flush_icache_range(jmp_addr, jmp_addr + 8);
1176 }
1177
1178 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1179 {
1180 if (!l->has_value) {
1181 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1182 tcg_out_insn(s, 3206, B, 0);
1183 } else {
1184 tcg_out_goto(s, l->u.value_ptr);
1185 }
1186 }
1187
1188 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1189 TCGArg b, bool b_const, TCGLabel *l)
1190 {
1191 intptr_t offset;
1192 bool need_cmp;
1193
1194 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1195 need_cmp = false;
1196 } else {
1197 need_cmp = true;
1198 tcg_out_cmp(s, ext, a, b, b_const);
1199 }
1200
1201 if (!l->has_value) {
1202 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1203 offset = tcg_in32(s) >> 5;
1204 } else {
1205 offset = l->u.value_ptr - s->code_ptr;
1206 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1207 }
1208
1209 if (need_cmp) {
1210 tcg_out_insn(s, 3202, B_C, c, offset);
1211 } else if (c == TCG_COND_EQ) {
1212 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1213 } else {
1214 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1215 }
1216 }
1217
1218 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1219 {
1220 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1221 }
1222
1223 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1224 {
1225 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1226 }
1227
1228 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1229 {
1230 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1231 }
1232
1233 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1234 TCGReg rd, TCGReg rn)
1235 {
1236 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1237 int bits = (8 << s_bits) - 1;
1238 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1239 }
1240
1241 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1242 TCGReg rd, TCGReg rn)
1243 {
1244 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1245 int bits = (8 << s_bits) - 1;
1246 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1247 }
1248
1249 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1250 TCGReg rn, int64_t aimm)
1251 {
1252 if (aimm >= 0) {
1253 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1254 } else {
1255 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1256 }
1257 }
1258
1259 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1260 TCGReg rh, TCGReg al, TCGReg ah,
1261 tcg_target_long bl, tcg_target_long bh,
1262 bool const_bl, bool const_bh, bool sub)
1263 {
1264 TCGReg orig_rl = rl;
1265 AArch64Insn insn;
1266
1267 if (rl == ah || (!const_bh && rl == bh)) {
1268 rl = TCG_REG_TMP;
1269 }
1270
1271 if (const_bl) {
1272 insn = I3401_ADDSI;
1273 if ((bl < 0) ^ sub) {
1274 insn = I3401_SUBSI;
1275 bl = -bl;
1276 }
1277 if (unlikely(al == TCG_REG_XZR)) {
1278 /* ??? We want to allow al to be zero for the benefit of
1279 negation via subtraction. However, that leaves open the
1280 possibility of adding 0+const in the low part, and the
1281 immediate add instructions encode XSP not XZR. Don't try
1282 anything more elaborate here than loading another zero. */
1283 al = TCG_REG_TMP;
1284 tcg_out_movi(s, ext, al, 0);
1285 }
1286 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1287 } else {
1288 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1289 }
1290
1291 insn = I3503_ADC;
1292 if (const_bh) {
1293 /* Note that the only two constants we support are 0 and -1, and
1294 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1295 if ((bh != 0) ^ sub) {
1296 insn = I3503_SBC;
1297 }
1298 bh = TCG_REG_XZR;
1299 } else if (sub) {
1300 insn = I3503_SBC;
1301 }
1302 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1303
1304 tcg_out_mov(s, ext, orig_rl, rl);
1305 }
1306
1307 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1308 {
1309 static const uint32_t sync[] = {
1310 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1311 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1312 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1313 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1314 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1315 };
1316 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1317 }
1318
1319 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1320 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1321 {
1322 TCGReg a1 = a0;
1323 if (is_ctz) {
1324 a1 = TCG_REG_TMP;
1325 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1326 }
1327 if (const_b && b == (ext ? 64 : 32)) {
1328 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1329 } else {
1330 AArch64Insn sel = I3506_CSEL;
1331
1332 tcg_out_cmp(s, ext, a0, 0, 1);
1333 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1334
1335 if (const_b) {
1336 if (b == -1) {
1337 b = TCG_REG_XZR;
1338 sel = I3506_CSINV;
1339 } else if (b == 0) {
1340 b = TCG_REG_XZR;
1341 } else {
1342 tcg_out_movi(s, ext, d, b);
1343 b = d;
1344 }
1345 }
1346 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1347 }
1348 }
1349
1350 #ifdef CONFIG_SOFTMMU
1351 #include "tcg-ldst.inc.c"
1352
1353 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1354 * TCGMemOpIdx oi, uintptr_t ra)
1355 */
1356 static void * const qemu_ld_helpers[16] = {
1357 [MO_UB] = helper_ret_ldub_mmu,
1358 [MO_LEUW] = helper_le_lduw_mmu,
1359 [MO_LEUL] = helper_le_ldul_mmu,
1360 [MO_LEQ] = helper_le_ldq_mmu,
1361 [MO_BEUW] = helper_be_lduw_mmu,
1362 [MO_BEUL] = helper_be_ldul_mmu,
1363 [MO_BEQ] = helper_be_ldq_mmu,
1364 };
1365
1366 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1367 * uintxx_t val, TCGMemOpIdx oi,
1368 * uintptr_t ra)
1369 */
1370 static void * const qemu_st_helpers[16] = {
1371 [MO_UB] = helper_ret_stb_mmu,
1372 [MO_LEUW] = helper_le_stw_mmu,
1373 [MO_LEUL] = helper_le_stl_mmu,
1374 [MO_LEQ] = helper_le_stq_mmu,
1375 [MO_BEUW] = helper_be_stw_mmu,
1376 [MO_BEUL] = helper_be_stl_mmu,
1377 [MO_BEQ] = helper_be_stq_mmu,
1378 };
1379
1380 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1381 {
1382 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1383 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1384 tcg_out_insn(s, 3406, ADR, rd, offset);
1385 }
1386
1387 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1388 {
1389 TCGMemOpIdx oi = lb->oi;
1390 TCGMemOp opc = get_memop(oi);
1391 TCGMemOp size = opc & MO_SIZE;
1392
1393 bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
1394 tcg_debug_assert(ok);
1395
1396 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1397 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1398 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1399 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1400 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1401 if (opc & MO_SIGN) {
1402 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1403 } else {
1404 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1405 }
1406
1407 tcg_out_goto(s, lb->raddr);
1408 }
1409
1410 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1411 {
1412 TCGMemOpIdx oi = lb->oi;
1413 TCGMemOp opc = get_memop(oi);
1414 TCGMemOp size = opc & MO_SIZE;
1415
1416 bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
1417 tcg_debug_assert(ok);
1418
1419 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1420 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1421 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1422 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1423 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1424 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1425 tcg_out_goto(s, lb->raddr);
1426 }
1427
1428 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1429 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1430 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1431 {
1432 TCGLabelQemuLdst *label = new_ldst_label(s);
1433
1434 label->is_ld = is_ld;
1435 label->oi = oi;
1436 label->type = ext;
1437 label->datalo_reg = data_reg;
1438 label->addrlo_reg = addr_reg;
1439 label->raddr = raddr;
1440 label->label_ptr[0] = label_ptr;
1441 }
1442
1443 /* Load and compare a TLB entry, emitting the conditional jump to the
1444 slow path for the failure case, which will be patched later when finalizing
1445 the slow path. Generated code returns the host addend in X1,
1446 clobbers X0,X2,X3,TMP. */
1447 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1448 tcg_insn_unit **label_ptr, int mem_index,
1449 bool is_read)
1450 {
1451 int tlb_offset = is_read ?
1452 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1453 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1454 unsigned a_bits = get_alignment_bits(opc);
1455 unsigned s_bits = opc & MO_SIZE;
1456 unsigned a_mask = (1u << a_bits) - 1;
1457 unsigned s_mask = (1u << s_bits) - 1;
1458 TCGReg base = TCG_AREG0, x3;
1459 uint64_t tlb_mask;
1460
1461 /* For aligned accesses, we check the first byte and include the alignment
1462 bits within the address. For unaligned access, we check that we don't
1463 cross pages using the address of the last byte of the access. */
1464 if (a_bits >= s_bits) {
1465 x3 = addr_reg;
1466 } else {
1467 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1468 TCG_REG_X3, addr_reg, s_mask - a_mask);
1469 x3 = TCG_REG_X3;
1470 }
1471 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1472
1473 /* Extract the TLB index from the address into X0.
1474 X0<CPU_TLB_BITS:0> =
1475 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1476 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1477 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1478
1479 /* Store the page mask part of the address into X3. */
1480 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1481 TCG_REG_X3, x3, tlb_mask);
1482
1483 /* Add any "high bits" from the tlb offset to the env address into X2,
1484 to take advantage of the LSL12 form of the ADDI instruction.
1485 X2 = env + (tlb_offset & 0xfff000) */
1486 if (tlb_offset & 0xfff000) {
1487 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1488 tlb_offset & 0xfff000);
1489 base = TCG_REG_X2;
1490 }
1491
1492 /* Merge the tlb index contribution into X2.
1493 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1494 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1495 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1496
1497 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1498 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1499 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1500 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff,
1501 TARGET_LONG_BITS == 32 ? 2 : 3);
1502
1503 /* Load the tlb addend. Do that early to avoid stalling.
1504 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1505 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1506 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1507 (is_read ? offsetof(CPUTLBEntry, addr_read)
1508 : offsetof(CPUTLBEntry, addr_write)), 3);
1509
1510 /* Perform the address comparison. */
1511 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1512
1513 /* If not equal, we jump to the slow path. */
1514 *label_ptr = s->code_ptr;
1515 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1516 }
1517
1518 #endif /* CONFIG_SOFTMMU */
1519
1520 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1521 TCGReg data_r, TCGReg addr_r,
1522 TCGType otype, TCGReg off_r)
1523 {
1524 const TCGMemOp bswap = memop & MO_BSWAP;
1525
1526 switch (memop & MO_SSIZE) {
1527 case MO_UB:
1528 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1529 break;
1530 case MO_SB:
1531 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1532 data_r, addr_r, otype, off_r);
1533 break;
1534 case MO_UW:
1535 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1536 if (bswap) {
1537 tcg_out_rev16(s, data_r, data_r);
1538 }
1539 break;
1540 case MO_SW:
1541 if (bswap) {
1542 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1543 tcg_out_rev16(s, data_r, data_r);
1544 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1545 } else {
1546 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1547 data_r, addr_r, otype, off_r);
1548 }
1549 break;
1550 case MO_UL:
1551 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1552 if (bswap) {
1553 tcg_out_rev32(s, data_r, data_r);
1554 }
1555 break;
1556 case MO_SL:
1557 if (bswap) {
1558 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1559 tcg_out_rev32(s, data_r, data_r);
1560 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1561 } else {
1562 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1563 }
1564 break;
1565 case MO_Q:
1566 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1567 if (bswap) {
1568 tcg_out_rev64(s, data_r, data_r);
1569 }
1570 break;
1571 default:
1572 tcg_abort();
1573 }
1574 }
1575
1576 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1577 TCGReg data_r, TCGReg addr_r,
1578 TCGType otype, TCGReg off_r)
1579 {
1580 const TCGMemOp bswap = memop & MO_BSWAP;
1581
1582 switch (memop & MO_SIZE) {
1583 case MO_8:
1584 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1585 break;
1586 case MO_16:
1587 if (bswap && data_r != TCG_REG_XZR) {
1588 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1589 data_r = TCG_REG_TMP;
1590 }
1591 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1592 break;
1593 case MO_32:
1594 if (bswap && data_r != TCG_REG_XZR) {
1595 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1596 data_r = TCG_REG_TMP;
1597 }
1598 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1599 break;
1600 case MO_64:
1601 if (bswap && data_r != TCG_REG_XZR) {
1602 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1603 data_r = TCG_REG_TMP;
1604 }
1605 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1606 break;
1607 default:
1608 tcg_abort();
1609 }
1610 }
1611
1612 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1613 TCGMemOpIdx oi, TCGType ext)
1614 {
1615 TCGMemOp memop = get_memop(oi);
1616 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1617 #ifdef CONFIG_SOFTMMU
1618 unsigned mem_index = get_mmuidx(oi);
1619 tcg_insn_unit *label_ptr;
1620
1621 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1622 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1623 TCG_REG_X1, otype, addr_reg);
1624 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1625 s->code_ptr, label_ptr);
1626 #else /* !CONFIG_SOFTMMU */
1627 if (USE_GUEST_BASE) {
1628 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1629 TCG_REG_GUEST_BASE, otype, addr_reg);
1630 } else {
1631 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1632 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1633 }
1634 #endif /* CONFIG_SOFTMMU */
1635 }
1636
1637 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1638 TCGMemOpIdx oi)
1639 {
1640 TCGMemOp memop = get_memop(oi);
1641 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1642 #ifdef CONFIG_SOFTMMU
1643 unsigned mem_index = get_mmuidx(oi);
1644 tcg_insn_unit *label_ptr;
1645
1646 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1647 tcg_out_qemu_st_direct(s, memop, data_reg,
1648 TCG_REG_X1, otype, addr_reg);
1649 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1650 data_reg, addr_reg, s->code_ptr, label_ptr);
1651 #else /* !CONFIG_SOFTMMU */
1652 if (USE_GUEST_BASE) {
1653 tcg_out_qemu_st_direct(s, memop, data_reg,
1654 TCG_REG_GUEST_BASE, otype, addr_reg);
1655 } else {
1656 tcg_out_qemu_st_direct(s, memop, data_reg,
1657 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1658 }
1659 #endif /* CONFIG_SOFTMMU */
1660 }
1661
1662 static tcg_insn_unit *tb_ret_addr;
1663
1664 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1665 const TCGArg args[TCG_MAX_OP_ARGS],
1666 const int const_args[TCG_MAX_OP_ARGS])
1667 {
1668 /* 99% of the time, we can signal the use of extension registers
1669 by looking to see if the opcode handles 64-bit data. */
1670 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1671
1672 /* Hoist the loads of the most common arguments. */
1673 TCGArg a0 = args[0];
1674 TCGArg a1 = args[1];
1675 TCGArg a2 = args[2];
1676 int c2 = const_args[2];
1677
1678 /* Some operands are defined with "rZ" constraint, a register or
1679 the zero register. These need not actually test args[I] == 0. */
1680 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1681
1682 switch (opc) {
1683 case INDEX_op_exit_tb:
1684 /* Reuse the zeroing that exists for goto_ptr. */
1685 if (a0 == 0) {
1686 tcg_out_goto_long(s, s->code_gen_epilogue);
1687 } else {
1688 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1689 tcg_out_goto_long(s, tb_ret_addr);
1690 }
1691 break;
1692
1693 case INDEX_op_goto_tb:
1694 if (s->tb_jmp_insn_offset != NULL) {
1695 /* TCG_TARGET_HAS_direct_jump */
1696 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1697 write can be used to patch the target address. */
1698 if ((uintptr_t)s->code_ptr & 7) {
1699 tcg_out32(s, NOP);
1700 }
1701 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1702 /* actual branch destination will be patched by
1703 tb_target_set_jmp_target later. */
1704 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1705 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1706 } else {
1707 /* !TCG_TARGET_HAS_direct_jump */
1708 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1709 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1710 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1711 }
1712 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1713 set_jmp_reset_offset(s, a0);
1714 break;
1715
1716 case INDEX_op_goto_ptr:
1717 tcg_out_insn(s, 3207, BR, a0);
1718 break;
1719
1720 case INDEX_op_br:
1721 tcg_out_goto_label(s, arg_label(a0));
1722 break;
1723
1724 case INDEX_op_ld8u_i32:
1725 case INDEX_op_ld8u_i64:
1726 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1727 break;
1728 case INDEX_op_ld8s_i32:
1729 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1730 break;
1731 case INDEX_op_ld8s_i64:
1732 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1733 break;
1734 case INDEX_op_ld16u_i32:
1735 case INDEX_op_ld16u_i64:
1736 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1737 break;
1738 case INDEX_op_ld16s_i32:
1739 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1740 break;
1741 case INDEX_op_ld16s_i64:
1742 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1743 break;
1744 case INDEX_op_ld_i32:
1745 case INDEX_op_ld32u_i64:
1746 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1747 break;
1748 case INDEX_op_ld32s_i64:
1749 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1750 break;
1751 case INDEX_op_ld_i64:
1752 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1753 break;
1754
1755 case INDEX_op_st8_i32:
1756 case INDEX_op_st8_i64:
1757 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1758 break;
1759 case INDEX_op_st16_i32:
1760 case INDEX_op_st16_i64:
1761 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1762 break;
1763 case INDEX_op_st_i32:
1764 case INDEX_op_st32_i64:
1765 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1766 break;
1767 case INDEX_op_st_i64:
1768 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1769 break;
1770
1771 case INDEX_op_add_i32:
1772 a2 = (int32_t)a2;
1773 /* FALLTHRU */
1774 case INDEX_op_add_i64:
1775 if (c2) {
1776 tcg_out_addsubi(s, ext, a0, a1, a2);
1777 } else {
1778 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1779 }
1780 break;
1781
1782 case INDEX_op_sub_i32:
1783 a2 = (int32_t)a2;
1784 /* FALLTHRU */
1785 case INDEX_op_sub_i64:
1786 if (c2) {
1787 tcg_out_addsubi(s, ext, a0, a1, -a2);
1788 } else {
1789 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1790 }
1791 break;
1792
1793 case INDEX_op_neg_i64:
1794 case INDEX_op_neg_i32:
1795 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1796 break;
1797
1798 case INDEX_op_and_i32:
1799 a2 = (int32_t)a2;
1800 /* FALLTHRU */
1801 case INDEX_op_and_i64:
1802 if (c2) {
1803 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1804 } else {
1805 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1806 }
1807 break;
1808
1809 case INDEX_op_andc_i32:
1810 a2 = (int32_t)a2;
1811 /* FALLTHRU */
1812 case INDEX_op_andc_i64:
1813 if (c2) {
1814 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1815 } else {
1816 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1817 }
1818 break;
1819
1820 case INDEX_op_or_i32:
1821 a2 = (int32_t)a2;
1822 /* FALLTHRU */
1823 case INDEX_op_or_i64:
1824 if (c2) {
1825 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1826 } else {
1827 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1828 }
1829 break;
1830
1831 case INDEX_op_orc_i32:
1832 a2 = (int32_t)a2;
1833 /* FALLTHRU */
1834 case INDEX_op_orc_i64:
1835 if (c2) {
1836 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1837 } else {
1838 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1839 }
1840 break;
1841
1842 case INDEX_op_xor_i32:
1843 a2 = (int32_t)a2;
1844 /* FALLTHRU */
1845 case INDEX_op_xor_i64:
1846 if (c2) {
1847 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1848 } else {
1849 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1850 }
1851 break;
1852
1853 case INDEX_op_eqv_i32:
1854 a2 = (int32_t)a2;
1855 /* FALLTHRU */
1856 case INDEX_op_eqv_i64:
1857 if (c2) {
1858 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1859 } else {
1860 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1861 }
1862 break;
1863
1864 case INDEX_op_not_i64:
1865 case INDEX_op_not_i32:
1866 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1867 break;
1868
1869 case INDEX_op_mul_i64:
1870 case INDEX_op_mul_i32:
1871 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1872 break;
1873
1874 case INDEX_op_div_i64:
1875 case INDEX_op_div_i32:
1876 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1877 break;
1878 case INDEX_op_divu_i64:
1879 case INDEX_op_divu_i32:
1880 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1881 break;
1882
1883 case INDEX_op_rem_i64:
1884 case INDEX_op_rem_i32:
1885 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1886 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1887 break;
1888 case INDEX_op_remu_i64:
1889 case INDEX_op_remu_i32:
1890 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1891 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1892 break;
1893
1894 case INDEX_op_shl_i64:
1895 case INDEX_op_shl_i32:
1896 if (c2) {
1897 tcg_out_shl(s, ext, a0, a1, a2);
1898 } else {
1899 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1900 }
1901 break;
1902
1903 case INDEX_op_shr_i64:
1904 case INDEX_op_shr_i32:
1905 if (c2) {
1906 tcg_out_shr(s, ext, a0, a1, a2);
1907 } else {
1908 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1909 }
1910 break;
1911
1912 case INDEX_op_sar_i64:
1913 case INDEX_op_sar_i32:
1914 if (c2) {
1915 tcg_out_sar(s, ext, a0, a1, a2);
1916 } else {
1917 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1918 }
1919 break;
1920
1921 case INDEX_op_rotr_i64:
1922 case INDEX_op_rotr_i32:
1923 if (c2) {
1924 tcg_out_rotr(s, ext, a0, a1, a2);
1925 } else {
1926 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1927 }
1928 break;
1929
1930 case INDEX_op_rotl_i64:
1931 case INDEX_op_rotl_i32:
1932 if (c2) {
1933 tcg_out_rotl(s, ext, a0, a1, a2);
1934 } else {
1935 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1936 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1937 }
1938 break;
1939
1940 case INDEX_op_clz_i64:
1941 case INDEX_op_clz_i32:
1942 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1943 break;
1944 case INDEX_op_ctz_i64:
1945 case INDEX_op_ctz_i32:
1946 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1947 break;
1948
1949 case INDEX_op_brcond_i32:
1950 a1 = (int32_t)a1;
1951 /* FALLTHRU */
1952 case INDEX_op_brcond_i64:
1953 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1954 break;
1955
1956 case INDEX_op_setcond_i32:
1957 a2 = (int32_t)a2;
1958 /* FALLTHRU */
1959 case INDEX_op_setcond_i64:
1960 tcg_out_cmp(s, ext, a1, a2, c2);
1961 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1962 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1963 TCG_REG_XZR, tcg_invert_cond(args[3]));
1964 break;
1965
1966 case INDEX_op_movcond_i32:
1967 a2 = (int32_t)a2;
1968 /* FALLTHRU */
1969 case INDEX_op_movcond_i64:
1970 tcg_out_cmp(s, ext, a1, a2, c2);
1971 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1972 break;
1973
1974 case INDEX_op_qemu_ld_i32:
1975 case INDEX_op_qemu_ld_i64:
1976 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1977 break;
1978 case INDEX_op_qemu_st_i32:
1979 case INDEX_op_qemu_st_i64:
1980 tcg_out_qemu_st(s, REG0(0), a1, a2);
1981 break;
1982
1983 case INDEX_op_bswap64_i64:
1984 tcg_out_rev64(s, a0, a1);
1985 break;
1986 case INDEX_op_bswap32_i64:
1987 case INDEX_op_bswap32_i32:
1988 tcg_out_rev32(s, a0, a1);
1989 break;
1990 case INDEX_op_bswap16_i64:
1991 case INDEX_op_bswap16_i32:
1992 tcg_out_rev16(s, a0, a1);
1993 break;
1994
1995 case INDEX_op_ext8s_i64:
1996 case INDEX_op_ext8s_i32:
1997 tcg_out_sxt(s, ext, MO_8, a0, a1);
1998 break;
1999 case INDEX_op_ext16s_i64:
2000 case INDEX_op_ext16s_i32:
2001 tcg_out_sxt(s, ext, MO_16, a0, a1);
2002 break;
2003 case INDEX_op_ext_i32_i64:
2004 case INDEX_op_ext32s_i64:
2005 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2006 break;
2007 case INDEX_op_ext8u_i64:
2008 case INDEX_op_ext8u_i32:
2009 tcg_out_uxt(s, MO_8, a0, a1);
2010 break;
2011 case INDEX_op_ext16u_i64:
2012 case INDEX_op_ext16u_i32:
2013 tcg_out_uxt(s, MO_16, a0, a1);
2014 break;
2015 case INDEX_op_extu_i32_i64:
2016 case INDEX_op_ext32u_i64:
2017 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2018 break;
2019
2020 case INDEX_op_deposit_i64:
2021 case INDEX_op_deposit_i32:
2022 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2023 break;
2024
2025 case INDEX_op_extract_i64:
2026 case INDEX_op_extract_i32:
2027 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2028 break;
2029
2030 case INDEX_op_sextract_i64:
2031 case INDEX_op_sextract_i32:
2032 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2033 break;
2034
2035 case INDEX_op_add2_i32:
2036 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2037 (int32_t)args[4], args[5], const_args[4],
2038 const_args[5], false);
2039 break;
2040 case INDEX_op_add2_i64:
2041 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2042 args[5], const_args[4], const_args[5], false);
2043 break;
2044 case INDEX_op_sub2_i32:
2045 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2046 (int32_t)args[4], args[5], const_args[4],
2047 const_args[5], true);
2048 break;
2049 case INDEX_op_sub2_i64:
2050 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2051 args[5], const_args[4], const_args[5], true);
2052 break;
2053
2054 case INDEX_op_muluh_i64:
2055 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2056 break;
2057 case INDEX_op_mulsh_i64:
2058 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2059 break;
2060
2061 case INDEX_op_mb:
2062 tcg_out_mb(s, a0);
2063 break;
2064
2065 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2066 case INDEX_op_mov_i64:
2067 case INDEX_op_mov_vec:
2068 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2069 case INDEX_op_movi_i64:
2070 case INDEX_op_dupi_vec:
2071 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2072 default:
2073 g_assert_not_reached();
2074 }
2075
2076 #undef REG0
2077 }
2078
2079 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2080 unsigned vecl, unsigned vece,
2081 const TCGArg *args, const int *const_args)
2082 {
2083 static const AArch64Insn cmp_insn[16] = {
2084 [TCG_COND_EQ] = I3616_CMEQ,
2085 [TCG_COND_GT] = I3616_CMGT,
2086 [TCG_COND_GE] = I3616_CMGE,
2087 [TCG_COND_GTU] = I3616_CMHI,
2088 [TCG_COND_GEU] = I3616_CMHS,
2089 };
2090 static const AArch64Insn cmp0_insn[16] = {
2091 [TCG_COND_EQ] = I3617_CMEQ0,
2092 [TCG_COND_GT] = I3617_CMGT0,
2093 [TCG_COND_GE] = I3617_CMGE0,
2094 [TCG_COND_LT] = I3617_CMLT0,
2095 [TCG_COND_LE] = I3617_CMLE0,
2096 };
2097
2098 TCGType type = vecl + TCG_TYPE_V64;
2099 unsigned is_q = vecl;
2100 TCGArg a0, a1, a2;
2101
2102 a0 = args[0];
2103 a1 = args[1];
2104 a2 = args[2];
2105
2106 switch (opc) {
2107 case INDEX_op_ld_vec:
2108 tcg_out_ld(s, type, a0, a1, a2);
2109 break;
2110 case INDEX_op_st_vec:
2111 tcg_out_st(s, type, a0, a1, a2);
2112 break;
2113 case INDEX_op_add_vec:
2114 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2115 break;
2116 case INDEX_op_sub_vec:
2117 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2118 break;
2119 case INDEX_op_mul_vec:
2120 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2121 break;
2122 case INDEX_op_neg_vec:
2123 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2124 break;
2125 case INDEX_op_and_vec:
2126 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2127 break;
2128 case INDEX_op_or_vec:
2129 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2130 break;
2131 case INDEX_op_xor_vec:
2132 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2133 break;
2134 case INDEX_op_andc_vec:
2135 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2136 break;
2137 case INDEX_op_orc_vec:
2138 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2139 break;
2140 case INDEX_op_not_vec:
2141 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2142 break;
2143 case INDEX_op_dup_vec:
2144 tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
2145 break;
2146 case INDEX_op_shli_vec:
2147 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2148 break;
2149 case INDEX_op_shri_vec:
2150 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2151 break;
2152 case INDEX_op_sari_vec:
2153 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2154 break;
2155 case INDEX_op_cmp_vec:
2156 {
2157 TCGCond cond = args[3];
2158 AArch64Insn insn;
2159
2160 if (cond == TCG_COND_NE) {
2161 if (const_args[2]) {
2162 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2163 } else {
2164 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2165 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2166 }
2167 } else {
2168 if (const_args[2]) {
2169 insn = cmp0_insn[cond];
2170 if (insn) {
2171 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2172 break;
2173 }
2174 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2175 a2 = TCG_VEC_TMP;
2176 }
2177 insn = cmp_insn[cond];
2178 if (insn == 0) {
2179 TCGArg t;
2180 t = a1, a1 = a2, a2 = t;
2181 cond = tcg_swap_cond(cond);
2182 insn = cmp_insn[cond];
2183 tcg_debug_assert(insn != 0);
2184 }
2185 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2186 }
2187 }
2188 break;
2189 default:
2190 g_assert_not_reached();
2191 }
2192 }
2193
2194 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2195 {
2196 switch (opc) {
2197 case INDEX_op_add_vec:
2198 case INDEX_op_sub_vec:
2199 case INDEX_op_and_vec:
2200 case INDEX_op_or_vec:
2201 case INDEX_op_xor_vec:
2202 case INDEX_op_andc_vec:
2203 case INDEX_op_orc_vec:
2204 case INDEX_op_neg_vec:
2205 case INDEX_op_not_vec:
2206 case INDEX_op_cmp_vec:
2207 case INDEX_op_shli_vec:
2208 case INDEX_op_shri_vec:
2209 case INDEX_op_sari_vec:
2210 return 1;
2211 case INDEX_op_mul_vec:
2212 return vece < MO_64;
2213
2214 default:
2215 return 0;
2216 }
2217 }
2218
2219 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2220 TCGArg a0, ...)
2221 {
2222 }
2223
2224 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2225 {
2226 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2227 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2228 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2229 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2230 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2231 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2232 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2233 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2234 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2235 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2236 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2237 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2238 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2239 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2240 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2241 static const TCGTargetOpDef r_r_rAL
2242 = { .args_ct_str = { "r", "r", "rAL" } };
2243 static const TCGTargetOpDef dep
2244 = { .args_ct_str = { "r", "0", "rZ" } };
2245 static const TCGTargetOpDef movc
2246 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2247 static const TCGTargetOpDef add2
2248 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2249
2250 switch (op) {
2251 case INDEX_op_goto_ptr:
2252 return &r;
2253
2254 case INDEX_op_ld8u_i32:
2255 case INDEX_op_ld8s_i32:
2256 case INDEX_op_ld16u_i32:
2257 case INDEX_op_ld16s_i32:
2258 case INDEX_op_ld_i32:
2259 case INDEX_op_ld8u_i64:
2260 case INDEX_op_ld8s_i64:
2261 case INDEX_op_ld16u_i64:
2262 case INDEX_op_ld16s_i64:
2263 case INDEX_op_ld32u_i64:
2264 case INDEX_op_ld32s_i64:
2265 case INDEX_op_ld_i64:
2266 case INDEX_op_neg_i32:
2267 case INDEX_op_neg_i64:
2268 case INDEX_op_not_i32:
2269 case INDEX_op_not_i64:
2270 case INDEX_op_bswap16_i32:
2271 case INDEX_op_bswap32_i32:
2272 case INDEX_op_bswap16_i64:
2273 case INDEX_op_bswap32_i64:
2274 case INDEX_op_bswap64_i64:
2275 case INDEX_op_ext8s_i32:
2276 case INDEX_op_ext16s_i32:
2277 case INDEX_op_ext8u_i32:
2278 case INDEX_op_ext16u_i32:
2279 case INDEX_op_ext8s_i64:
2280 case INDEX_op_ext16s_i64:
2281 case INDEX_op_ext32s_i64:
2282 case INDEX_op_ext8u_i64:
2283 case INDEX_op_ext16u_i64:
2284 case INDEX_op_ext32u_i64:
2285 case INDEX_op_ext_i32_i64:
2286 case INDEX_op_extu_i32_i64:
2287 case INDEX_op_extract_i32:
2288 case INDEX_op_extract_i64:
2289 case INDEX_op_sextract_i32:
2290 case INDEX_op_sextract_i64:
2291 return &r_r;
2292
2293 case INDEX_op_st8_i32:
2294 case INDEX_op_st16_i32:
2295 case INDEX_op_st_i32:
2296 case INDEX_op_st8_i64:
2297 case INDEX_op_st16_i64:
2298 case INDEX_op_st32_i64:
2299 case INDEX_op_st_i64:
2300 return &rZ_r;
2301
2302 case INDEX_op_add_i32:
2303 case INDEX_op_add_i64:
2304 case INDEX_op_sub_i32:
2305 case INDEX_op_sub_i64:
2306 case INDEX_op_setcond_i32:
2307 case INDEX_op_setcond_i64:
2308 return &r_r_rA;
2309
2310 case INDEX_op_mul_i32:
2311 case INDEX_op_mul_i64:
2312 case INDEX_op_div_i32:
2313 case INDEX_op_div_i64:
2314 case INDEX_op_divu_i32:
2315 case INDEX_op_divu_i64:
2316 case INDEX_op_rem_i32:
2317 case INDEX_op_rem_i64:
2318 case INDEX_op_remu_i32:
2319 case INDEX_op_remu_i64:
2320 case INDEX_op_muluh_i64:
2321 case INDEX_op_mulsh_i64:
2322 return &r_r_r;
2323
2324 case INDEX_op_and_i32:
2325 case INDEX_op_and_i64:
2326 case INDEX_op_or_i32:
2327 case INDEX_op_or_i64:
2328 case INDEX_op_xor_i32:
2329 case INDEX_op_xor_i64:
2330 case INDEX_op_andc_i32:
2331 case INDEX_op_andc_i64:
2332 case INDEX_op_orc_i32:
2333 case INDEX_op_orc_i64:
2334 case INDEX_op_eqv_i32:
2335 case INDEX_op_eqv_i64:
2336 return &r_r_rL;
2337
2338 case INDEX_op_shl_i32:
2339 case INDEX_op_shr_i32:
2340 case INDEX_op_sar_i32:
2341 case INDEX_op_rotl_i32:
2342 case INDEX_op_rotr_i32:
2343 case INDEX_op_shl_i64:
2344 case INDEX_op_shr_i64:
2345 case INDEX_op_sar_i64:
2346 case INDEX_op_rotl_i64:
2347 case INDEX_op_rotr_i64:
2348 return &r_r_ri;
2349
2350 case INDEX_op_clz_i32:
2351 case INDEX_op_ctz_i32:
2352 case INDEX_op_clz_i64:
2353 case INDEX_op_ctz_i64:
2354 return &r_r_rAL;
2355
2356 case INDEX_op_brcond_i32:
2357 case INDEX_op_brcond_i64:
2358 return &r_rA;
2359
2360 case INDEX_op_movcond_i32:
2361 case INDEX_op_movcond_i64:
2362 return &movc;
2363
2364 case INDEX_op_qemu_ld_i32:
2365 case INDEX_op_qemu_ld_i64:
2366 return &r_l;
2367 case INDEX_op_qemu_st_i32:
2368 case INDEX_op_qemu_st_i64:
2369 return &lZ_l;
2370
2371 case INDEX_op_deposit_i32:
2372 case INDEX_op_deposit_i64:
2373 return &dep;
2374
2375 case INDEX_op_add2_i32:
2376 case INDEX_op_add2_i64:
2377 case INDEX_op_sub2_i32:
2378 case INDEX_op_sub2_i64:
2379 return &add2;
2380
2381 case INDEX_op_add_vec:
2382 case INDEX_op_sub_vec:
2383 case INDEX_op_mul_vec:
2384 case INDEX_op_and_vec:
2385 case INDEX_op_or_vec:
2386 case INDEX_op_xor_vec:
2387 case INDEX_op_andc_vec:
2388 case INDEX_op_orc_vec:
2389 return &w_w_w;
2390 case INDEX_op_not_vec:
2391 case INDEX_op_neg_vec:
2392 case INDEX_op_shli_vec:
2393 case INDEX_op_shri_vec:
2394 case INDEX_op_sari_vec:
2395 return &w_w;
2396 case INDEX_op_ld_vec:
2397 case INDEX_op_st_vec:
2398 return &w_r;
2399 case INDEX_op_dup_vec:
2400 return &w_wr;
2401 case INDEX_op_cmp_vec:
2402 return &w_w_wZ;
2403
2404 default:
2405 return NULL;
2406 }
2407 }
2408
2409 static void tcg_target_init(TCGContext *s)
2410 {
2411 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2412 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2413 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2414 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2415
2416 tcg_target_call_clobber_regs = -1ull;
2417 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2418 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2419 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2420 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2421 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2422 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2423 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2424 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2425 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2426 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2427 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2428 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2429 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2430 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2431 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2432 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2433 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2434 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2435 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2436
2437 s->reserved_regs = 0;
2438 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2439 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2440 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2441 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2442 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2443 }
2444
2445 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2446 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2447
2448 #define FRAME_SIZE \
2449 ((PUSH_SIZE \
2450 + TCG_STATIC_CALL_ARGS_SIZE \
2451 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2452 + TCG_TARGET_STACK_ALIGN - 1) \
2453 & ~(TCG_TARGET_STACK_ALIGN - 1))
2454
2455 /* We're expecting a 2 byte uleb128 encoded value. */
2456 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2457
2458 /* We're expecting to use a single ADDI insn. */
2459 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2460
2461 static void tcg_target_qemu_prologue(TCGContext *s)
2462 {
2463 TCGReg r;
2464
2465 /* Push (FP, LR) and allocate space for all saved registers. */
2466 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2467 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2468
2469 /* Set up frame pointer for canonical unwinding. */
2470 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2471
2472 /* Store callee-preserved regs x19..x28. */
2473 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2474 int ofs = (r - TCG_REG_X19 + 2) * 8;
2475 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2476 }
2477
2478 /* Make stack space for TCG locals. */
2479 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2480 FRAME_SIZE - PUSH_SIZE);
2481
2482 /* Inform TCG about how to find TCG locals with register, offset, size. */
2483 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2484 CPU_TEMP_BUF_NLONGS * sizeof(long));
2485
2486 #if !defined(CONFIG_SOFTMMU)
2487 if (USE_GUEST_BASE) {
2488 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2489 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2490 }
2491 #endif
2492
2493 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2494 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2495
2496 /*
2497 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2498 * and fall through to the rest of the epilogue.
2499 */
2500 s->code_gen_epilogue = s->code_ptr;
2501 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2502
2503 /* TB epilogue */
2504 tb_ret_addr = s->code_ptr;
2505
2506 /* Remove TCG locals stack space. */
2507 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2508 FRAME_SIZE - PUSH_SIZE);
2509
2510 /* Restore registers x19..x28. */
2511 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2512 int ofs = (r - TCG_REG_X19 + 2) * 8;
2513 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2514 }
2515
2516 /* Pop (FP, LR), restore SP to previous frame. */
2517 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2518 TCG_REG_SP, PUSH_SIZE, 0, 1);
2519 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2520 }
2521
2522 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2523 {
2524 int i;
2525 for (i = 0; i < count; ++i) {
2526 p[i] = NOP;
2527 }
2528 }
2529
2530 typedef struct {
2531 DebugFrameHeader h;
2532 uint8_t fde_def_cfa[4];
2533 uint8_t fde_reg_ofs[24];
2534 } DebugFrame;
2535
2536 #define ELF_HOST_MACHINE EM_AARCH64
2537
2538 static const DebugFrame debug_frame = {
2539 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2540 .h.cie.id = -1,
2541 .h.cie.version = 1,
2542 .h.cie.code_align = 1,
2543 .h.cie.data_align = 0x78, /* sleb128 -8 */
2544 .h.cie.return_column = TCG_REG_LR,
2545
2546 /* Total FDE size does not include the "len" member. */
2547 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2548
2549 .fde_def_cfa = {
2550 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2551 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2552 (FRAME_SIZE >> 7)
2553 },
2554 .fde_reg_ofs = {
2555 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2556 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2557 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2558 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2559 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2560 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2561 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2562 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2563 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2564 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2565 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2566 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2567 }
2568 };
2569
2570 void tcg_register_jit(void *buf, size_t buf_size)
2571 {
2572 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2573 }