]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.inc.c
tcg: Manually expand INDEX_op_dup_vec
[mirror_qemu.git] / tcg / aarch64 / tcg-target.inc.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32 };
33 #endif /* CONFIG_DEBUG_TCG */
34
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
43
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
51
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59 };
60
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64 };
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
67 };
68
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
71
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
80
81 static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
82 {
83 ptrdiff_t offset = target - code_ptr;
84 if (offset == sextract64(offset, 0, 26)) {
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88 return true;
89 }
90 return false;
91 }
92
93 static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
94 {
95 ptrdiff_t offset = target - code_ptr;
96 if (offset == sextract64(offset, 0, 19)) {
97 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
98 return true;
99 }
100 return false;
101 }
102
103 static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
104 intptr_t value, intptr_t addend)
105 {
106 tcg_debug_assert(addend == 0);
107 switch (type) {
108 case R_AARCH64_JUMP26:
109 case R_AARCH64_CALL26:
110 return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
111 case R_AARCH64_CONDBR19:
112 return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
113 default:
114 g_assert_not_reached();
115 }
116 }
117
118 #define TCG_CT_CONST_AIMM 0x100
119 #define TCG_CT_CONST_LIMM 0x200
120 #define TCG_CT_CONST_ZERO 0x400
121 #define TCG_CT_CONST_MONE 0x800
122
123 /* parse target specific constraints */
124 static const char *target_parse_constraint(TCGArgConstraint *ct,
125 const char *ct_str, TCGType type)
126 {
127 switch (*ct_str++) {
128 case 'r': /* general registers */
129 ct->ct |= TCG_CT_REG;
130 ct->u.regs |= 0xffffffffu;
131 break;
132 case 'w': /* advsimd registers */
133 ct->ct |= TCG_CT_REG;
134 ct->u.regs |= 0xffffffff00000000ull;
135 break;
136 case 'l': /* qemu_ld / qemu_st address, data_reg */
137 ct->ct |= TCG_CT_REG;
138 ct->u.regs = 0xffffffffu;
139 #ifdef CONFIG_SOFTMMU
140 /* x0 and x1 will be overwritten when reading the tlb entry,
141 and x2, and x3 for helper args, better to avoid using them. */
142 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
143 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
144 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
145 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
146 #endif
147 break;
148 case 'A': /* Valid for arithmetic immediate (positive or negative). */
149 ct->ct |= TCG_CT_CONST_AIMM;
150 break;
151 case 'L': /* Valid for logical immediate. */
152 ct->ct |= TCG_CT_CONST_LIMM;
153 break;
154 case 'M': /* minus one */
155 ct->ct |= TCG_CT_CONST_MONE;
156 break;
157 case 'Z': /* zero */
158 ct->ct |= TCG_CT_CONST_ZERO;
159 break;
160 default:
161 return NULL;
162 }
163 return ct_str;
164 }
165
166 /* Match a constant valid for addition (12-bit, optionally shifted). */
167 static inline bool is_aimm(uint64_t val)
168 {
169 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
170 }
171
172 /* Match a constant valid for logical operations. */
173 static inline bool is_limm(uint64_t val)
174 {
175 /* Taking a simplified view of the logical immediates for now, ignoring
176 the replication that can happen across the field. Match bit patterns
177 of the forms
178 0....01....1
179 0..01..10..0
180 and their inverses. */
181
182 /* Make things easier below, by testing the form with msb clear. */
183 if ((int64_t)val < 0) {
184 val = ~val;
185 }
186 if (val == 0) {
187 return false;
188 }
189 val += val & -val;
190 return (val & (val - 1)) == 0;
191 }
192
193 /* Match a constant that is valid for vectors. */
194 static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
195 {
196 int i;
197
198 *op = 0;
199 /* Match replication across 8 bits. */
200 if (v64 == dup_const(MO_8, v64)) {
201 *cmode = 0xe;
202 *imm8 = v64 & 0xff;
203 return true;
204 }
205 /* Match replication across 16 bits. */
206 if (v64 == dup_const(MO_16, v64)) {
207 uint16_t v16 = v64;
208
209 if (v16 == (v16 & 0xff)) {
210 *cmode = 0x8;
211 *imm8 = v16 & 0xff;
212 return true;
213 } else if (v16 == (v16 & 0xff00)) {
214 *cmode = 0xa;
215 *imm8 = v16 >> 8;
216 return true;
217 }
218 }
219 /* Match replication across 32 bits. */
220 if (v64 == dup_const(MO_32, v64)) {
221 uint32_t v32 = v64;
222
223 if (v32 == (v32 & 0xff)) {
224 *cmode = 0x0;
225 *imm8 = v32 & 0xff;
226 return true;
227 } else if (v32 == (v32 & 0xff00)) {
228 *cmode = 0x2;
229 *imm8 = (v32 >> 8) & 0xff;
230 return true;
231 } else if (v32 == (v32 & 0xff0000)) {
232 *cmode = 0x4;
233 *imm8 = (v32 >> 16) & 0xff;
234 return true;
235 } else if (v32 == (v32 & 0xff000000)) {
236 *cmode = 0x6;
237 *imm8 = v32 >> 24;
238 return true;
239 } else if ((v32 & 0xffff00ff) == 0xff) {
240 *cmode = 0xc;
241 *imm8 = (v32 >> 8) & 0xff;
242 return true;
243 } else if ((v32 & 0xff00ffff) == 0xffff) {
244 *cmode = 0xd;
245 *imm8 = (v32 >> 16) & 0xff;
246 return true;
247 }
248 /* Match forms of a float32. */
249 if (extract32(v32, 0, 19) == 0
250 && (extract32(v32, 25, 6) == 0x20
251 || extract32(v32, 25, 6) == 0x1f)) {
252 *cmode = 0xf;
253 *imm8 = (extract32(v32, 31, 1) << 7)
254 | (extract32(v32, 25, 1) << 6)
255 | extract32(v32, 19, 6);
256 return true;
257 }
258 }
259 /* Match forms of a float64. */
260 if (extract64(v64, 0, 48) == 0
261 && (extract64(v64, 54, 9) == 0x100
262 || extract64(v64, 54, 9) == 0x0ff)) {
263 *cmode = 0xf;
264 *op = 1;
265 *imm8 = (extract64(v64, 63, 1) << 7)
266 | (extract64(v64, 54, 1) << 6)
267 | extract64(v64, 48, 6);
268 return true;
269 }
270 /* Match bytes of 0x00 and 0xff. */
271 for (i = 0; i < 64; i += 8) {
272 uint64_t byte = extract64(v64, i, 8);
273 if (byte != 0 && byte != 0xff) {
274 break;
275 }
276 }
277 if (i == 64) {
278 *cmode = 0xe;
279 *op = 1;
280 *imm8 = (extract64(v64, 0, 1) << 0)
281 | (extract64(v64, 8, 1) << 1)
282 | (extract64(v64, 16, 1) << 2)
283 | (extract64(v64, 24, 1) << 3)
284 | (extract64(v64, 32, 1) << 4)
285 | (extract64(v64, 40, 1) << 5)
286 | (extract64(v64, 48, 1) << 6)
287 | (extract64(v64, 56, 1) << 7);
288 return true;
289 }
290 return false;
291 }
292
293 static int tcg_target_const_match(tcg_target_long val, TCGType type,
294 const TCGArgConstraint *arg_ct)
295 {
296 int ct = arg_ct->ct;
297
298 if (ct & TCG_CT_CONST) {
299 return 1;
300 }
301 if (type == TCG_TYPE_I32) {
302 val = (int32_t)val;
303 }
304 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
305 return 1;
306 }
307 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
308 return 1;
309 }
310 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
311 return 1;
312 }
313 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
314 return 1;
315 }
316
317 return 0;
318 }
319
320 enum aarch64_cond_code {
321 COND_EQ = 0x0,
322 COND_NE = 0x1,
323 COND_CS = 0x2, /* Unsigned greater or equal */
324 COND_HS = COND_CS, /* ALIAS greater or equal */
325 COND_CC = 0x3, /* Unsigned less than */
326 COND_LO = COND_CC, /* ALIAS Lower */
327 COND_MI = 0x4, /* Negative */
328 COND_PL = 0x5, /* Zero or greater */
329 COND_VS = 0x6, /* Overflow */
330 COND_VC = 0x7, /* No overflow */
331 COND_HI = 0x8, /* Unsigned greater than */
332 COND_LS = 0x9, /* Unsigned less or equal */
333 COND_GE = 0xa,
334 COND_LT = 0xb,
335 COND_GT = 0xc,
336 COND_LE = 0xd,
337 COND_AL = 0xe,
338 COND_NV = 0xf, /* behaves like COND_AL here */
339 };
340
341 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
342 [TCG_COND_EQ] = COND_EQ,
343 [TCG_COND_NE] = COND_NE,
344 [TCG_COND_LT] = COND_LT,
345 [TCG_COND_GE] = COND_GE,
346 [TCG_COND_LE] = COND_LE,
347 [TCG_COND_GT] = COND_GT,
348 /* unsigned */
349 [TCG_COND_LTU] = COND_LO,
350 [TCG_COND_GTU] = COND_HI,
351 [TCG_COND_GEU] = COND_HS,
352 [TCG_COND_LEU] = COND_LS,
353 };
354
355 typedef enum {
356 LDST_ST = 0, /* store */
357 LDST_LD = 1, /* load */
358 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
359 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
360 } AArch64LdstType;
361
362 /* We encode the format of the insn into the beginning of the name, so that
363 we can have the preprocessor help "typecheck" the insn vs the output
364 function. Arm didn't provide us with nice names for the formats, so we
365 use the section number of the architecture reference manual in which the
366 instruction group is described. */
367 typedef enum {
368 /* Compare and branch (immediate). */
369 I3201_CBZ = 0x34000000,
370 I3201_CBNZ = 0x35000000,
371
372 /* Conditional branch (immediate). */
373 I3202_B_C = 0x54000000,
374
375 /* Unconditional branch (immediate). */
376 I3206_B = 0x14000000,
377 I3206_BL = 0x94000000,
378
379 /* Unconditional branch (register). */
380 I3207_BR = 0xd61f0000,
381 I3207_BLR = 0xd63f0000,
382 I3207_RET = 0xd65f0000,
383
384 /* Load literal for loading the address at pc-relative offset */
385 I3305_LDR = 0x58000000,
386 I3305_LDR_v64 = 0x5c000000,
387 I3305_LDR_v128 = 0x9c000000,
388
389 /* Load/store register. Described here as 3.3.12, but the helper
390 that emits them can transform to 3.3.10 or 3.3.13. */
391 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
392 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
393 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
394 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
395
396 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
397 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
398 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
399 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
400
401 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
402 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
403
404 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
405 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
406 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
407
408 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
409 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
410
411 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
412 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
413
414 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
415 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
416
417 I3312_TO_I3310 = 0x00200800,
418 I3312_TO_I3313 = 0x01000000,
419
420 /* Load/store register pair instructions. */
421 I3314_LDP = 0x28400000,
422 I3314_STP = 0x28000000,
423
424 /* Add/subtract immediate instructions. */
425 I3401_ADDI = 0x11000000,
426 I3401_ADDSI = 0x31000000,
427 I3401_SUBI = 0x51000000,
428 I3401_SUBSI = 0x71000000,
429
430 /* Bitfield instructions. */
431 I3402_BFM = 0x33000000,
432 I3402_SBFM = 0x13000000,
433 I3402_UBFM = 0x53000000,
434
435 /* Extract instruction. */
436 I3403_EXTR = 0x13800000,
437
438 /* Logical immediate instructions. */
439 I3404_ANDI = 0x12000000,
440 I3404_ORRI = 0x32000000,
441 I3404_EORI = 0x52000000,
442
443 /* Move wide immediate instructions. */
444 I3405_MOVN = 0x12800000,
445 I3405_MOVZ = 0x52800000,
446 I3405_MOVK = 0x72800000,
447
448 /* PC relative addressing instructions. */
449 I3406_ADR = 0x10000000,
450 I3406_ADRP = 0x90000000,
451
452 /* Add/subtract shifted register instructions (without a shift). */
453 I3502_ADD = 0x0b000000,
454 I3502_ADDS = 0x2b000000,
455 I3502_SUB = 0x4b000000,
456 I3502_SUBS = 0x6b000000,
457
458 /* Add/subtract shifted register instructions (with a shift). */
459 I3502S_ADD_LSL = I3502_ADD,
460
461 /* Add/subtract with carry instructions. */
462 I3503_ADC = 0x1a000000,
463 I3503_SBC = 0x5a000000,
464
465 /* Conditional select instructions. */
466 I3506_CSEL = 0x1a800000,
467 I3506_CSINC = 0x1a800400,
468 I3506_CSINV = 0x5a800000,
469 I3506_CSNEG = 0x5a800400,
470
471 /* Data-processing (1 source) instructions. */
472 I3507_CLZ = 0x5ac01000,
473 I3507_RBIT = 0x5ac00000,
474 I3507_REV16 = 0x5ac00400,
475 I3507_REV32 = 0x5ac00800,
476 I3507_REV64 = 0x5ac00c00,
477
478 /* Data-processing (2 source) instructions. */
479 I3508_LSLV = 0x1ac02000,
480 I3508_LSRV = 0x1ac02400,
481 I3508_ASRV = 0x1ac02800,
482 I3508_RORV = 0x1ac02c00,
483 I3508_SMULH = 0x9b407c00,
484 I3508_UMULH = 0x9bc07c00,
485 I3508_UDIV = 0x1ac00800,
486 I3508_SDIV = 0x1ac00c00,
487
488 /* Data-processing (3 source) instructions. */
489 I3509_MADD = 0x1b000000,
490 I3509_MSUB = 0x1b008000,
491
492 /* Logical shifted register instructions (without a shift). */
493 I3510_AND = 0x0a000000,
494 I3510_BIC = 0x0a200000,
495 I3510_ORR = 0x2a000000,
496 I3510_ORN = 0x2a200000,
497 I3510_EOR = 0x4a000000,
498 I3510_EON = 0x4a200000,
499 I3510_ANDS = 0x6a000000,
500
501 /* Logical shifted register instructions (with a shift). */
502 I3502S_AND_LSR = I3510_AND | (1 << 22),
503
504 /* AdvSIMD copy */
505 I3605_DUP = 0x0e000400,
506 I3605_INS = 0x4e001c00,
507 I3605_UMOV = 0x0e003c00,
508
509 /* AdvSIMD modified immediate */
510 I3606_MOVI = 0x0f000400,
511
512 /* AdvSIMD shift by immediate */
513 I3614_SSHR = 0x0f000400,
514 I3614_SSRA = 0x0f001400,
515 I3614_SHL = 0x0f005400,
516 I3614_USHR = 0x2f000400,
517 I3614_USRA = 0x2f001400,
518
519 /* AdvSIMD three same. */
520 I3616_ADD = 0x0e208400,
521 I3616_AND = 0x0e201c00,
522 I3616_BIC = 0x0e601c00,
523 I3616_EOR = 0x2e201c00,
524 I3616_MUL = 0x0e209c00,
525 I3616_ORR = 0x0ea01c00,
526 I3616_ORN = 0x0ee01c00,
527 I3616_SUB = 0x2e208400,
528 I3616_CMGT = 0x0e203400,
529 I3616_CMGE = 0x0e203c00,
530 I3616_CMTST = 0x0e208c00,
531 I3616_CMHI = 0x2e203400,
532 I3616_CMHS = 0x2e203c00,
533 I3616_CMEQ = 0x2e208c00,
534 I3616_SMAX = 0x0e206400,
535 I3616_SMIN = 0x0e206c00,
536 I3616_SQADD = 0x0e200c00,
537 I3616_SQSUB = 0x0e202c00,
538 I3616_UMAX = 0x2e206400,
539 I3616_UMIN = 0x2e206c00,
540 I3616_UQADD = 0x2e200c00,
541 I3616_UQSUB = 0x2e202c00,
542
543 /* AdvSIMD two-reg misc. */
544 I3617_CMGT0 = 0x0e208800,
545 I3617_CMEQ0 = 0x0e209800,
546 I3617_CMLT0 = 0x0e20a800,
547 I3617_CMGE0 = 0x2e208800,
548 I3617_CMLE0 = 0x2e20a800,
549 I3617_NOT = 0x2e205800,
550 I3617_NEG = 0x2e20b800,
551
552 /* System instructions. */
553 NOP = 0xd503201f,
554 DMB_ISH = 0xd50338bf,
555 DMB_LD = 0x00000100,
556 DMB_ST = 0x00000200,
557 } AArch64Insn;
558
559 static inline uint32_t tcg_in32(TCGContext *s)
560 {
561 uint32_t v = *(uint32_t *)s->code_ptr;
562 return v;
563 }
564
565 /* Emit an opcode with "type-checking" of the format. */
566 #define tcg_out_insn(S, FMT, OP, ...) \
567 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
568
569 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
570 {
571 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
572 }
573
574 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
575 TCGReg rt, int imm19)
576 {
577 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
578 }
579
580 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
581 TCGCond c, int imm19)
582 {
583 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
584 }
585
586 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
587 {
588 tcg_out32(s, insn | (imm26 & 0x03ffffff));
589 }
590
591 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
592 {
593 tcg_out32(s, insn | rn << 5);
594 }
595
596 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
597 TCGReg r1, TCGReg r2, TCGReg rn,
598 tcg_target_long ofs, bool pre, bool w)
599 {
600 insn |= 1u << 31; /* ext */
601 insn |= pre << 24;
602 insn |= w << 23;
603
604 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
605 insn |= (ofs & (0x7f << 3)) << (15 - 3);
606
607 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
608 }
609
610 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
611 TCGReg rd, TCGReg rn, uint64_t aimm)
612 {
613 if (aimm > 0xfff) {
614 tcg_debug_assert((aimm & 0xfff) == 0);
615 aimm >>= 12;
616 tcg_debug_assert(aimm <= 0xfff);
617 aimm |= 1 << 12; /* apply LSL 12 */
618 }
619 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
620 }
621
622 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
623 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
624 that feed the DecodeBitMasks pseudo function. */
625 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
626 TCGReg rd, TCGReg rn, int n, int immr, int imms)
627 {
628 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
629 | rn << 5 | rd);
630 }
631
632 #define tcg_out_insn_3404 tcg_out_insn_3402
633
634 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
635 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
636 {
637 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
638 | rn << 5 | rd);
639 }
640
641 /* This function is used for the Move (wide immediate) instruction group.
642 Note that SHIFT is a full shift count, not the 2 bit HW field. */
643 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
644 TCGReg rd, uint16_t half, unsigned shift)
645 {
646 tcg_debug_assert((shift & ~0x30) == 0);
647 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
648 }
649
650 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
651 TCGReg rd, int64_t disp)
652 {
653 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
654 }
655
656 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
657 the rare occasion when we actually want to supply a shift amount. */
658 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
659 TCGType ext, TCGReg rd, TCGReg rn,
660 TCGReg rm, int imm6)
661 {
662 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
663 }
664
665 /* This function is for 3.5.2 (Add/subtract shifted register),
666 and 3.5.10 (Logical shifted register), for the vast majorty of cases
667 when we don't want to apply a shift. Thus it can also be used for
668 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
669 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
670 TCGReg rd, TCGReg rn, TCGReg rm)
671 {
672 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
673 }
674
675 #define tcg_out_insn_3503 tcg_out_insn_3502
676 #define tcg_out_insn_3508 tcg_out_insn_3502
677 #define tcg_out_insn_3510 tcg_out_insn_3502
678
679 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
680 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
681 {
682 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
683 | tcg_cond_to_aarch64[c] << 12);
684 }
685
686 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
687 TCGReg rd, TCGReg rn)
688 {
689 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
690 }
691
692 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
693 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
694 {
695 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
696 }
697
698 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
699 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
700 {
701 /* Note that bit 11 set means general register input. Therefore
702 we can handle both register sets with one function. */
703 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
704 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
705 }
706
707 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
708 TCGReg rd, bool op, int cmode, uint8_t imm8)
709 {
710 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
711 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
712 }
713
714 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
715 TCGReg rd, TCGReg rn, unsigned immhb)
716 {
717 tcg_out32(s, insn | q << 30 | immhb << 16
718 | (rn & 0x1f) << 5 | (rd & 0x1f));
719 }
720
721 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
722 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
723 {
724 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
725 | (rn & 0x1f) << 5 | (rd & 0x1f));
726 }
727
728 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
729 unsigned size, TCGReg rd, TCGReg rn)
730 {
731 tcg_out32(s, insn | q << 30 | (size << 22)
732 | (rn & 0x1f) << 5 | (rd & 0x1f));
733 }
734
735 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
736 TCGReg rd, TCGReg base, TCGType ext,
737 TCGReg regoff)
738 {
739 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
740 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
741 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
742 }
743
744 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
745 TCGReg rd, TCGReg rn, intptr_t offset)
746 {
747 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
748 }
749
750 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
751 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
752 {
753 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
754 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
755 | rn << 5 | (rd & 0x1f));
756 }
757
758 /* Register to register move using ORR (shifted register with no shift). */
759 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
760 {
761 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
762 }
763
764 /* Register to register move using ADDI (move to/from SP). */
765 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
766 {
767 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
768 }
769
770 /* This function is used for the Logical (immediate) instruction group.
771 The value of LIMM must satisfy IS_LIMM. See the comment above about
772 only supporting simplified logical immediates. */
773 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
774 TCGReg rd, TCGReg rn, uint64_t limm)
775 {
776 unsigned h, l, r, c;
777
778 tcg_debug_assert(is_limm(limm));
779
780 h = clz64(limm);
781 l = ctz64(limm);
782 if (l == 0) {
783 r = 0; /* form 0....01....1 */
784 c = ctz64(~limm) - 1;
785 if (h == 0) {
786 r = clz64(~limm); /* form 1..10..01..1 */
787 c += r;
788 }
789 } else {
790 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
791 c = r - h - 1;
792 }
793 if (ext == TCG_TYPE_I32) {
794 r &= 31;
795 c &= 31;
796 }
797
798 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
799 }
800
801 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
802 TCGReg rd, tcg_target_long v64)
803 {
804 int op, cmode, imm8;
805
806 if (is_fimm(v64, &op, &cmode, &imm8)) {
807 tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
808 } else if (type == TCG_TYPE_V128) {
809 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
810 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
811 } else {
812 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
813 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
814 }
815 }
816
817 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
818 TCGReg rd, TCGReg rs)
819 {
820 int is_q = type - TCG_TYPE_V64;
821 tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
822 return true;
823 }
824
825 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
826 tcg_target_long value)
827 {
828 tcg_target_long svalue = value;
829 tcg_target_long ivalue = ~value;
830 tcg_target_long t0, t1, t2;
831 int s0, s1;
832 AArch64Insn opc;
833
834 switch (type) {
835 case TCG_TYPE_I32:
836 case TCG_TYPE_I64:
837 tcg_debug_assert(rd < 32);
838 break;
839
840 case TCG_TYPE_V64:
841 case TCG_TYPE_V128:
842 tcg_debug_assert(rd >= 32);
843 tcg_out_dupi_vec(s, type, rd, value);
844 return;
845
846 default:
847 g_assert_not_reached();
848 }
849
850 /* For 32-bit values, discard potential garbage in value. For 64-bit
851 values within [2**31, 2**32-1], we can create smaller sequences by
852 interpreting this as a negative 32-bit number, while ensuring that
853 the high 32 bits are cleared by setting SF=0. */
854 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
855 svalue = (int32_t)value;
856 value = (uint32_t)value;
857 ivalue = (uint32_t)ivalue;
858 type = TCG_TYPE_I32;
859 }
860
861 /* Speed things up by handling the common case of small positive
862 and negative values specially. */
863 if ((value & ~0xffffull) == 0) {
864 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
865 return;
866 } else if ((ivalue & ~0xffffull) == 0) {
867 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
868 return;
869 }
870
871 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
872 use the sign-extended value. That lets us match rotated values such
873 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
874 if (is_limm(svalue)) {
875 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
876 return;
877 }
878
879 /* Look for host pointer values within 4G of the PC. This happens
880 often when loading pointers to QEMU's own data structures. */
881 if (type == TCG_TYPE_I64) {
882 tcg_target_long disp = value - (intptr_t)s->code_ptr;
883 if (disp == sextract64(disp, 0, 21)) {
884 tcg_out_insn(s, 3406, ADR, rd, disp);
885 return;
886 }
887 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
888 if (disp == sextract64(disp, 0, 21)) {
889 tcg_out_insn(s, 3406, ADRP, rd, disp);
890 if (value & 0xfff) {
891 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
892 }
893 return;
894 }
895 }
896
897 /* Would it take fewer insns to begin with MOVN? */
898 if (ctpop64(value) >= 32) {
899 t0 = ivalue;
900 opc = I3405_MOVN;
901 } else {
902 t0 = value;
903 opc = I3405_MOVZ;
904 }
905 s0 = ctz64(t0) & (63 & -16);
906 t1 = t0 & ~(0xffffUL << s0);
907 s1 = ctz64(t1) & (63 & -16);
908 t2 = t1 & ~(0xffffUL << s1);
909 if (t2 == 0) {
910 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
911 if (t1 != 0) {
912 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
913 }
914 return;
915 }
916
917 /* For more than 2 insns, dump it into the constant pool. */
918 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
919 tcg_out_insn(s, 3305, LDR, 0, rd);
920 }
921
922 /* Define something more legible for general use. */
923 #define tcg_out_ldst_r tcg_out_insn_3310
924
925 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
926 TCGReg rn, intptr_t offset, int lgsize)
927 {
928 /* If the offset is naturally aligned and in range, then we can
929 use the scaled uimm12 encoding */
930 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
931 uintptr_t scaled_uimm = offset >> lgsize;
932 if (scaled_uimm <= 0xfff) {
933 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
934 return;
935 }
936 }
937
938 /* Small signed offsets can use the unscaled encoding. */
939 if (offset >= -256 && offset < 256) {
940 tcg_out_insn_3312(s, insn, rd, rn, offset);
941 return;
942 }
943
944 /* Worst-case scenario, move offset to temp register, use reg offset. */
945 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
946 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
947 }
948
949 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
950 {
951 if (ret == arg) {
952 return true;
953 }
954 switch (type) {
955 case TCG_TYPE_I32:
956 case TCG_TYPE_I64:
957 if (ret < 32 && arg < 32) {
958 tcg_out_movr(s, type, ret, arg);
959 break;
960 } else if (ret < 32) {
961 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
962 break;
963 } else if (arg < 32) {
964 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
965 break;
966 }
967 /* FALLTHRU */
968
969 case TCG_TYPE_V64:
970 tcg_debug_assert(ret >= 32 && arg >= 32);
971 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
972 break;
973 case TCG_TYPE_V128:
974 tcg_debug_assert(ret >= 32 && arg >= 32);
975 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
976 break;
977
978 default:
979 g_assert_not_reached();
980 }
981 return true;
982 }
983
984 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
985 TCGReg base, intptr_t ofs)
986 {
987 AArch64Insn insn;
988 int lgsz;
989
990 switch (type) {
991 case TCG_TYPE_I32:
992 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
993 lgsz = 2;
994 break;
995 case TCG_TYPE_I64:
996 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
997 lgsz = 3;
998 break;
999 case TCG_TYPE_V64:
1000 insn = I3312_LDRVD;
1001 lgsz = 3;
1002 break;
1003 case TCG_TYPE_V128:
1004 insn = I3312_LDRVQ;
1005 lgsz = 4;
1006 break;
1007 default:
1008 g_assert_not_reached();
1009 }
1010 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1011 }
1012
1013 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1014 TCGReg base, intptr_t ofs)
1015 {
1016 AArch64Insn insn;
1017 int lgsz;
1018
1019 switch (type) {
1020 case TCG_TYPE_I32:
1021 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1022 lgsz = 2;
1023 break;
1024 case TCG_TYPE_I64:
1025 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1026 lgsz = 3;
1027 break;
1028 case TCG_TYPE_V64:
1029 insn = I3312_STRVD;
1030 lgsz = 3;
1031 break;
1032 case TCG_TYPE_V128:
1033 insn = I3312_STRVQ;
1034 lgsz = 4;
1035 break;
1036 default:
1037 g_assert_not_reached();
1038 }
1039 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1040 }
1041
1042 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1043 TCGReg base, intptr_t ofs)
1044 {
1045 if (type <= TCG_TYPE_I64 && val == 0) {
1046 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1047 return true;
1048 }
1049 return false;
1050 }
1051
1052 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1053 TCGReg rn, unsigned int a, unsigned int b)
1054 {
1055 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1056 }
1057
1058 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1059 TCGReg rn, unsigned int a, unsigned int b)
1060 {
1061 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1062 }
1063
1064 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1065 TCGReg rn, unsigned int a, unsigned int b)
1066 {
1067 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1068 }
1069
1070 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1071 TCGReg rn, TCGReg rm, unsigned int a)
1072 {
1073 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1074 }
1075
1076 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1077 TCGReg rd, TCGReg rn, unsigned int m)
1078 {
1079 int bits = ext ? 64 : 32;
1080 int max = bits - 1;
1081 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1082 }
1083
1084 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1085 TCGReg rd, TCGReg rn, unsigned int m)
1086 {
1087 int max = ext ? 63 : 31;
1088 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1089 }
1090
1091 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1092 TCGReg rd, TCGReg rn, unsigned int m)
1093 {
1094 int max = ext ? 63 : 31;
1095 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1096 }
1097
1098 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1099 TCGReg rd, TCGReg rn, unsigned int m)
1100 {
1101 int max = ext ? 63 : 31;
1102 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1103 }
1104
1105 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1106 TCGReg rd, TCGReg rn, unsigned int m)
1107 {
1108 int bits = ext ? 64 : 32;
1109 int max = bits - 1;
1110 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1111 }
1112
1113 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1114 TCGReg rn, unsigned lsb, unsigned width)
1115 {
1116 unsigned size = ext ? 64 : 32;
1117 unsigned a = (size - lsb) & (size - 1);
1118 unsigned b = width - 1;
1119 tcg_out_bfm(s, ext, rd, rn, a, b);
1120 }
1121
1122 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1123 tcg_target_long b, bool const_b)
1124 {
1125 if (const_b) {
1126 /* Using CMP or CMN aliases. */
1127 if (b >= 0) {
1128 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1129 } else {
1130 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1131 }
1132 } else {
1133 /* Using CMP alias SUBS wzr, Wn, Wm */
1134 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1135 }
1136 }
1137
1138 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1139 {
1140 ptrdiff_t offset = target - s->code_ptr;
1141 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1142 tcg_out_insn(s, 3206, B, offset);
1143 }
1144
1145 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1146 {
1147 ptrdiff_t offset = target - s->code_ptr;
1148 if (offset == sextract64(offset, 0, 26)) {
1149 tcg_out_insn(s, 3206, BL, offset);
1150 } else {
1151 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1152 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1153 }
1154 }
1155
1156 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1157 {
1158 tcg_out_insn(s, 3207, BLR, reg);
1159 }
1160
1161 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1162 {
1163 ptrdiff_t offset = target - s->code_ptr;
1164 if (offset == sextract64(offset, 0, 26)) {
1165 tcg_out_insn(s, 3206, BL, offset);
1166 } else {
1167 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1168 tcg_out_callr(s, TCG_REG_TMP);
1169 }
1170 }
1171
1172 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1173 uintptr_t addr)
1174 {
1175 tcg_insn_unit i1, i2;
1176 TCGType rt = TCG_TYPE_I64;
1177 TCGReg rd = TCG_REG_TMP;
1178 uint64_t pair;
1179
1180 ptrdiff_t offset = addr - jmp_addr;
1181
1182 if (offset == sextract64(offset, 0, 26)) {
1183 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1184 i2 = NOP;
1185 } else {
1186 offset = (addr >> 12) - (jmp_addr >> 12);
1187
1188 /* patch ADRP */
1189 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1190 /* patch ADDI */
1191 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1192 }
1193 pair = (uint64_t)i2 << 32 | i1;
1194 atomic_set((uint64_t *)jmp_addr, pair);
1195 flush_icache_range(jmp_addr, jmp_addr + 8);
1196 }
1197
1198 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1199 {
1200 if (!l->has_value) {
1201 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1202 tcg_out_insn(s, 3206, B, 0);
1203 } else {
1204 tcg_out_goto(s, l->u.value_ptr);
1205 }
1206 }
1207
1208 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1209 TCGArg b, bool b_const, TCGLabel *l)
1210 {
1211 intptr_t offset;
1212 bool need_cmp;
1213
1214 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1215 need_cmp = false;
1216 } else {
1217 need_cmp = true;
1218 tcg_out_cmp(s, ext, a, b, b_const);
1219 }
1220
1221 if (!l->has_value) {
1222 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1223 offset = tcg_in32(s) >> 5;
1224 } else {
1225 offset = l->u.value_ptr - s->code_ptr;
1226 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1227 }
1228
1229 if (need_cmp) {
1230 tcg_out_insn(s, 3202, B_C, c, offset);
1231 } else if (c == TCG_COND_EQ) {
1232 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1233 } else {
1234 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1235 }
1236 }
1237
1238 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1239 {
1240 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1241 }
1242
1243 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1244 {
1245 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1246 }
1247
1248 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1249 {
1250 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1251 }
1252
1253 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1254 TCGReg rd, TCGReg rn)
1255 {
1256 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1257 int bits = (8 << s_bits) - 1;
1258 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1259 }
1260
1261 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1262 TCGReg rd, TCGReg rn)
1263 {
1264 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1265 int bits = (8 << s_bits) - 1;
1266 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1267 }
1268
1269 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1270 TCGReg rn, int64_t aimm)
1271 {
1272 if (aimm >= 0) {
1273 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1274 } else {
1275 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1276 }
1277 }
1278
1279 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1280 TCGReg rh, TCGReg al, TCGReg ah,
1281 tcg_target_long bl, tcg_target_long bh,
1282 bool const_bl, bool const_bh, bool sub)
1283 {
1284 TCGReg orig_rl = rl;
1285 AArch64Insn insn;
1286
1287 if (rl == ah || (!const_bh && rl == bh)) {
1288 rl = TCG_REG_TMP;
1289 }
1290
1291 if (const_bl) {
1292 insn = I3401_ADDSI;
1293 if ((bl < 0) ^ sub) {
1294 insn = I3401_SUBSI;
1295 bl = -bl;
1296 }
1297 if (unlikely(al == TCG_REG_XZR)) {
1298 /* ??? We want to allow al to be zero for the benefit of
1299 negation via subtraction. However, that leaves open the
1300 possibility of adding 0+const in the low part, and the
1301 immediate add instructions encode XSP not XZR. Don't try
1302 anything more elaborate here than loading another zero. */
1303 al = TCG_REG_TMP;
1304 tcg_out_movi(s, ext, al, 0);
1305 }
1306 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1307 } else {
1308 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1309 }
1310
1311 insn = I3503_ADC;
1312 if (const_bh) {
1313 /* Note that the only two constants we support are 0 and -1, and
1314 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1315 if ((bh != 0) ^ sub) {
1316 insn = I3503_SBC;
1317 }
1318 bh = TCG_REG_XZR;
1319 } else if (sub) {
1320 insn = I3503_SBC;
1321 }
1322 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1323
1324 tcg_out_mov(s, ext, orig_rl, rl);
1325 }
1326
1327 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1328 {
1329 static const uint32_t sync[] = {
1330 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1331 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1332 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1333 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1334 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1335 };
1336 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1337 }
1338
1339 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1340 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1341 {
1342 TCGReg a1 = a0;
1343 if (is_ctz) {
1344 a1 = TCG_REG_TMP;
1345 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1346 }
1347 if (const_b && b == (ext ? 64 : 32)) {
1348 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1349 } else {
1350 AArch64Insn sel = I3506_CSEL;
1351
1352 tcg_out_cmp(s, ext, a0, 0, 1);
1353 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1354
1355 if (const_b) {
1356 if (b == -1) {
1357 b = TCG_REG_XZR;
1358 sel = I3506_CSINV;
1359 } else if (b == 0) {
1360 b = TCG_REG_XZR;
1361 } else {
1362 tcg_out_movi(s, ext, d, b);
1363 b = d;
1364 }
1365 }
1366 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1367 }
1368 }
1369
1370 #ifdef CONFIG_SOFTMMU
1371 #include "tcg-ldst.inc.c"
1372
1373 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1374 * TCGMemOpIdx oi, uintptr_t ra)
1375 */
1376 static void * const qemu_ld_helpers[16] = {
1377 [MO_UB] = helper_ret_ldub_mmu,
1378 [MO_LEUW] = helper_le_lduw_mmu,
1379 [MO_LEUL] = helper_le_ldul_mmu,
1380 [MO_LEQ] = helper_le_ldq_mmu,
1381 [MO_BEUW] = helper_be_lduw_mmu,
1382 [MO_BEUL] = helper_be_ldul_mmu,
1383 [MO_BEQ] = helper_be_ldq_mmu,
1384 };
1385
1386 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1387 * uintxx_t val, TCGMemOpIdx oi,
1388 * uintptr_t ra)
1389 */
1390 static void * const qemu_st_helpers[16] = {
1391 [MO_UB] = helper_ret_stb_mmu,
1392 [MO_LEUW] = helper_le_stw_mmu,
1393 [MO_LEUL] = helper_le_stl_mmu,
1394 [MO_LEQ] = helper_le_stq_mmu,
1395 [MO_BEUW] = helper_be_stw_mmu,
1396 [MO_BEUL] = helper_be_stl_mmu,
1397 [MO_BEQ] = helper_be_stq_mmu,
1398 };
1399
1400 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1401 {
1402 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1403 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1404 tcg_out_insn(s, 3406, ADR, rd, offset);
1405 }
1406
1407 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1408 {
1409 TCGMemOpIdx oi = lb->oi;
1410 TCGMemOp opc = get_memop(oi);
1411 TCGMemOp size = opc & MO_SIZE;
1412
1413 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1414 return false;
1415 }
1416
1417 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1418 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1419 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1420 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1421 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1422 if (opc & MO_SIGN) {
1423 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1424 } else {
1425 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1426 }
1427
1428 tcg_out_goto(s, lb->raddr);
1429 return true;
1430 }
1431
1432 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1433 {
1434 TCGMemOpIdx oi = lb->oi;
1435 TCGMemOp opc = get_memop(oi);
1436 TCGMemOp size = opc & MO_SIZE;
1437
1438 if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1439 return false;
1440 }
1441
1442 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1443 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1444 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1445 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1446 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1447 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1448 tcg_out_goto(s, lb->raddr);
1449 return true;
1450 }
1451
1452 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1453 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1454 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1455 {
1456 TCGLabelQemuLdst *label = new_ldst_label(s);
1457
1458 label->is_ld = is_ld;
1459 label->oi = oi;
1460 label->type = ext;
1461 label->datalo_reg = data_reg;
1462 label->addrlo_reg = addr_reg;
1463 label->raddr = raddr;
1464 label->label_ptr[0] = label_ptr;
1465 }
1466
1467 /* We expect tlb_mask to be before tlb_table. */
1468 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
1469 offsetof(CPUArchState, tlb_mask));
1470
1471 /* We expect to use a 24-bit unsigned offset from ENV. */
1472 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
1473 > 0xffffff);
1474
1475 /* Load and compare a TLB entry, emitting the conditional jump to the
1476 slow path for the failure case, which will be patched later when finalizing
1477 the slow path. Generated code returns the host addend in X1,
1478 clobbers X0,X2,X3,TMP. */
1479 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1480 tcg_insn_unit **label_ptr, int mem_index,
1481 bool is_read)
1482 {
1483 int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]);
1484 int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]);
1485 unsigned a_bits = get_alignment_bits(opc);
1486 unsigned s_bits = opc & MO_SIZE;
1487 unsigned a_mask = (1u << a_bits) - 1;
1488 unsigned s_mask = (1u << s_bits) - 1;
1489 TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0, x3;
1490 TCGType mask_type;
1491 uint64_t compare_mask;
1492
1493 if (table_ofs > 0xfff) {
1494 int table_hi = table_ofs & ~0xfff;
1495 int mask_hi = mask_ofs & ~0xfff;
1496
1497 table_base = TCG_REG_X1;
1498 if (mask_hi == table_hi) {
1499 mask_base = table_base;
1500 } else if (mask_hi) {
1501 mask_base = TCG_REG_X0;
1502 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1503 mask_base, TCG_AREG0, mask_hi);
1504 }
1505 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64,
1506 table_base, TCG_AREG0, table_hi);
1507 mask_ofs -= mask_hi;
1508 table_ofs -= table_hi;
1509 }
1510
1511 mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1512 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1513
1514 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
1515 tcg_out_ld(s, mask_type, TCG_REG_X0, mask_base, mask_ofs);
1516 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, table_base, table_ofs);
1517
1518 /* Extract the TLB index from the address into X0. */
1519 tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1520 TCG_REG_X0, TCG_REG_X0, addr_reg,
1521 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1522
1523 /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
1524 tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1525
1526 /* Load the tlb comparator into X0, and the fast path addend into X1. */
1527 tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1528 ? offsetof(CPUTLBEntry, addr_read)
1529 : offsetof(CPUTLBEntry, addr_write));
1530 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1531 offsetof(CPUTLBEntry, addend));
1532
1533 /* For aligned accesses, we check the first byte and include the alignment
1534 bits within the address. For unaligned access, we check that we don't
1535 cross pages using the address of the last byte of the access. */
1536 if (a_bits >= s_bits) {
1537 x3 = addr_reg;
1538 } else {
1539 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1540 TCG_REG_X3, addr_reg, s_mask - a_mask);
1541 x3 = TCG_REG_X3;
1542 }
1543 compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1544
1545 /* Store the page mask part of the address into X3. */
1546 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1547 TCG_REG_X3, x3, compare_mask);
1548
1549 /* Perform the address comparison. */
1550 tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1551
1552 /* If not equal, we jump to the slow path. */
1553 *label_ptr = s->code_ptr;
1554 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1555 }
1556
1557 #endif /* CONFIG_SOFTMMU */
1558
1559 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1560 TCGReg data_r, TCGReg addr_r,
1561 TCGType otype, TCGReg off_r)
1562 {
1563 const TCGMemOp bswap = memop & MO_BSWAP;
1564
1565 switch (memop & MO_SSIZE) {
1566 case MO_UB:
1567 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1568 break;
1569 case MO_SB:
1570 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1571 data_r, addr_r, otype, off_r);
1572 break;
1573 case MO_UW:
1574 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1575 if (bswap) {
1576 tcg_out_rev16(s, data_r, data_r);
1577 }
1578 break;
1579 case MO_SW:
1580 if (bswap) {
1581 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1582 tcg_out_rev16(s, data_r, data_r);
1583 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1584 } else {
1585 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1586 data_r, addr_r, otype, off_r);
1587 }
1588 break;
1589 case MO_UL:
1590 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1591 if (bswap) {
1592 tcg_out_rev32(s, data_r, data_r);
1593 }
1594 break;
1595 case MO_SL:
1596 if (bswap) {
1597 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1598 tcg_out_rev32(s, data_r, data_r);
1599 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1600 } else {
1601 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1602 }
1603 break;
1604 case MO_Q:
1605 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1606 if (bswap) {
1607 tcg_out_rev64(s, data_r, data_r);
1608 }
1609 break;
1610 default:
1611 tcg_abort();
1612 }
1613 }
1614
1615 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1616 TCGReg data_r, TCGReg addr_r,
1617 TCGType otype, TCGReg off_r)
1618 {
1619 const TCGMemOp bswap = memop & MO_BSWAP;
1620
1621 switch (memop & MO_SIZE) {
1622 case MO_8:
1623 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1624 break;
1625 case MO_16:
1626 if (bswap && data_r != TCG_REG_XZR) {
1627 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1628 data_r = TCG_REG_TMP;
1629 }
1630 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1631 break;
1632 case MO_32:
1633 if (bswap && data_r != TCG_REG_XZR) {
1634 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1635 data_r = TCG_REG_TMP;
1636 }
1637 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1638 break;
1639 case MO_64:
1640 if (bswap && data_r != TCG_REG_XZR) {
1641 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1642 data_r = TCG_REG_TMP;
1643 }
1644 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1645 break;
1646 default:
1647 tcg_abort();
1648 }
1649 }
1650
1651 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1652 TCGMemOpIdx oi, TCGType ext)
1653 {
1654 TCGMemOp memop = get_memop(oi);
1655 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1656 #ifdef CONFIG_SOFTMMU
1657 unsigned mem_index = get_mmuidx(oi);
1658 tcg_insn_unit *label_ptr;
1659
1660 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1661 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1662 TCG_REG_X1, otype, addr_reg);
1663 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1664 s->code_ptr, label_ptr);
1665 #else /* !CONFIG_SOFTMMU */
1666 if (USE_GUEST_BASE) {
1667 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1668 TCG_REG_GUEST_BASE, otype, addr_reg);
1669 } else {
1670 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1671 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1672 }
1673 #endif /* CONFIG_SOFTMMU */
1674 }
1675
1676 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1677 TCGMemOpIdx oi)
1678 {
1679 TCGMemOp memop = get_memop(oi);
1680 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1681 #ifdef CONFIG_SOFTMMU
1682 unsigned mem_index = get_mmuidx(oi);
1683 tcg_insn_unit *label_ptr;
1684
1685 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1686 tcg_out_qemu_st_direct(s, memop, data_reg,
1687 TCG_REG_X1, otype, addr_reg);
1688 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1689 data_reg, addr_reg, s->code_ptr, label_ptr);
1690 #else /* !CONFIG_SOFTMMU */
1691 if (USE_GUEST_BASE) {
1692 tcg_out_qemu_st_direct(s, memop, data_reg,
1693 TCG_REG_GUEST_BASE, otype, addr_reg);
1694 } else {
1695 tcg_out_qemu_st_direct(s, memop, data_reg,
1696 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1697 }
1698 #endif /* CONFIG_SOFTMMU */
1699 }
1700
1701 static tcg_insn_unit *tb_ret_addr;
1702
1703 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1704 const TCGArg args[TCG_MAX_OP_ARGS],
1705 const int const_args[TCG_MAX_OP_ARGS])
1706 {
1707 /* 99% of the time, we can signal the use of extension registers
1708 by looking to see if the opcode handles 64-bit data. */
1709 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1710
1711 /* Hoist the loads of the most common arguments. */
1712 TCGArg a0 = args[0];
1713 TCGArg a1 = args[1];
1714 TCGArg a2 = args[2];
1715 int c2 = const_args[2];
1716
1717 /* Some operands are defined with "rZ" constraint, a register or
1718 the zero register. These need not actually test args[I] == 0. */
1719 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1720
1721 switch (opc) {
1722 case INDEX_op_exit_tb:
1723 /* Reuse the zeroing that exists for goto_ptr. */
1724 if (a0 == 0) {
1725 tcg_out_goto_long(s, s->code_gen_epilogue);
1726 } else {
1727 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1728 tcg_out_goto_long(s, tb_ret_addr);
1729 }
1730 break;
1731
1732 case INDEX_op_goto_tb:
1733 if (s->tb_jmp_insn_offset != NULL) {
1734 /* TCG_TARGET_HAS_direct_jump */
1735 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1736 write can be used to patch the target address. */
1737 if ((uintptr_t)s->code_ptr & 7) {
1738 tcg_out32(s, NOP);
1739 }
1740 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1741 /* actual branch destination will be patched by
1742 tb_target_set_jmp_target later. */
1743 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1744 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1745 } else {
1746 /* !TCG_TARGET_HAS_direct_jump */
1747 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1748 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1749 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1750 }
1751 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1752 set_jmp_reset_offset(s, a0);
1753 break;
1754
1755 case INDEX_op_goto_ptr:
1756 tcg_out_insn(s, 3207, BR, a0);
1757 break;
1758
1759 case INDEX_op_br:
1760 tcg_out_goto_label(s, arg_label(a0));
1761 break;
1762
1763 case INDEX_op_ld8u_i32:
1764 case INDEX_op_ld8u_i64:
1765 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1766 break;
1767 case INDEX_op_ld8s_i32:
1768 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1769 break;
1770 case INDEX_op_ld8s_i64:
1771 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1772 break;
1773 case INDEX_op_ld16u_i32:
1774 case INDEX_op_ld16u_i64:
1775 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1776 break;
1777 case INDEX_op_ld16s_i32:
1778 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1779 break;
1780 case INDEX_op_ld16s_i64:
1781 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1782 break;
1783 case INDEX_op_ld_i32:
1784 case INDEX_op_ld32u_i64:
1785 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1786 break;
1787 case INDEX_op_ld32s_i64:
1788 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1789 break;
1790 case INDEX_op_ld_i64:
1791 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1792 break;
1793
1794 case INDEX_op_st8_i32:
1795 case INDEX_op_st8_i64:
1796 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1797 break;
1798 case INDEX_op_st16_i32:
1799 case INDEX_op_st16_i64:
1800 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1801 break;
1802 case INDEX_op_st_i32:
1803 case INDEX_op_st32_i64:
1804 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1805 break;
1806 case INDEX_op_st_i64:
1807 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1808 break;
1809
1810 case INDEX_op_add_i32:
1811 a2 = (int32_t)a2;
1812 /* FALLTHRU */
1813 case INDEX_op_add_i64:
1814 if (c2) {
1815 tcg_out_addsubi(s, ext, a0, a1, a2);
1816 } else {
1817 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1818 }
1819 break;
1820
1821 case INDEX_op_sub_i32:
1822 a2 = (int32_t)a2;
1823 /* FALLTHRU */
1824 case INDEX_op_sub_i64:
1825 if (c2) {
1826 tcg_out_addsubi(s, ext, a0, a1, -a2);
1827 } else {
1828 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1829 }
1830 break;
1831
1832 case INDEX_op_neg_i64:
1833 case INDEX_op_neg_i32:
1834 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1835 break;
1836
1837 case INDEX_op_and_i32:
1838 a2 = (int32_t)a2;
1839 /* FALLTHRU */
1840 case INDEX_op_and_i64:
1841 if (c2) {
1842 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1843 } else {
1844 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1845 }
1846 break;
1847
1848 case INDEX_op_andc_i32:
1849 a2 = (int32_t)a2;
1850 /* FALLTHRU */
1851 case INDEX_op_andc_i64:
1852 if (c2) {
1853 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1854 } else {
1855 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1856 }
1857 break;
1858
1859 case INDEX_op_or_i32:
1860 a2 = (int32_t)a2;
1861 /* FALLTHRU */
1862 case INDEX_op_or_i64:
1863 if (c2) {
1864 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1865 } else {
1866 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1867 }
1868 break;
1869
1870 case INDEX_op_orc_i32:
1871 a2 = (int32_t)a2;
1872 /* FALLTHRU */
1873 case INDEX_op_orc_i64:
1874 if (c2) {
1875 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1876 } else {
1877 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1878 }
1879 break;
1880
1881 case INDEX_op_xor_i32:
1882 a2 = (int32_t)a2;
1883 /* FALLTHRU */
1884 case INDEX_op_xor_i64:
1885 if (c2) {
1886 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1887 } else {
1888 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1889 }
1890 break;
1891
1892 case INDEX_op_eqv_i32:
1893 a2 = (int32_t)a2;
1894 /* FALLTHRU */
1895 case INDEX_op_eqv_i64:
1896 if (c2) {
1897 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1898 } else {
1899 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1900 }
1901 break;
1902
1903 case INDEX_op_not_i64:
1904 case INDEX_op_not_i32:
1905 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1906 break;
1907
1908 case INDEX_op_mul_i64:
1909 case INDEX_op_mul_i32:
1910 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1911 break;
1912
1913 case INDEX_op_div_i64:
1914 case INDEX_op_div_i32:
1915 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1916 break;
1917 case INDEX_op_divu_i64:
1918 case INDEX_op_divu_i32:
1919 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1920 break;
1921
1922 case INDEX_op_rem_i64:
1923 case INDEX_op_rem_i32:
1924 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1925 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1926 break;
1927 case INDEX_op_remu_i64:
1928 case INDEX_op_remu_i32:
1929 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1930 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1931 break;
1932
1933 case INDEX_op_shl_i64:
1934 case INDEX_op_shl_i32:
1935 if (c2) {
1936 tcg_out_shl(s, ext, a0, a1, a2);
1937 } else {
1938 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1939 }
1940 break;
1941
1942 case INDEX_op_shr_i64:
1943 case INDEX_op_shr_i32:
1944 if (c2) {
1945 tcg_out_shr(s, ext, a0, a1, a2);
1946 } else {
1947 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1948 }
1949 break;
1950
1951 case INDEX_op_sar_i64:
1952 case INDEX_op_sar_i32:
1953 if (c2) {
1954 tcg_out_sar(s, ext, a0, a1, a2);
1955 } else {
1956 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1957 }
1958 break;
1959
1960 case INDEX_op_rotr_i64:
1961 case INDEX_op_rotr_i32:
1962 if (c2) {
1963 tcg_out_rotr(s, ext, a0, a1, a2);
1964 } else {
1965 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1966 }
1967 break;
1968
1969 case INDEX_op_rotl_i64:
1970 case INDEX_op_rotl_i32:
1971 if (c2) {
1972 tcg_out_rotl(s, ext, a0, a1, a2);
1973 } else {
1974 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1975 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1976 }
1977 break;
1978
1979 case INDEX_op_clz_i64:
1980 case INDEX_op_clz_i32:
1981 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1982 break;
1983 case INDEX_op_ctz_i64:
1984 case INDEX_op_ctz_i32:
1985 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1986 break;
1987
1988 case INDEX_op_brcond_i32:
1989 a1 = (int32_t)a1;
1990 /* FALLTHRU */
1991 case INDEX_op_brcond_i64:
1992 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1993 break;
1994
1995 case INDEX_op_setcond_i32:
1996 a2 = (int32_t)a2;
1997 /* FALLTHRU */
1998 case INDEX_op_setcond_i64:
1999 tcg_out_cmp(s, ext, a1, a2, c2);
2000 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
2001 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2002 TCG_REG_XZR, tcg_invert_cond(args[3]));
2003 break;
2004
2005 case INDEX_op_movcond_i32:
2006 a2 = (int32_t)a2;
2007 /* FALLTHRU */
2008 case INDEX_op_movcond_i64:
2009 tcg_out_cmp(s, ext, a1, a2, c2);
2010 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2011 break;
2012
2013 case INDEX_op_qemu_ld_i32:
2014 case INDEX_op_qemu_ld_i64:
2015 tcg_out_qemu_ld(s, a0, a1, a2, ext);
2016 break;
2017 case INDEX_op_qemu_st_i32:
2018 case INDEX_op_qemu_st_i64:
2019 tcg_out_qemu_st(s, REG0(0), a1, a2);
2020 break;
2021
2022 case INDEX_op_bswap64_i64:
2023 tcg_out_rev64(s, a0, a1);
2024 break;
2025 case INDEX_op_bswap32_i64:
2026 case INDEX_op_bswap32_i32:
2027 tcg_out_rev32(s, a0, a1);
2028 break;
2029 case INDEX_op_bswap16_i64:
2030 case INDEX_op_bswap16_i32:
2031 tcg_out_rev16(s, a0, a1);
2032 break;
2033
2034 case INDEX_op_ext8s_i64:
2035 case INDEX_op_ext8s_i32:
2036 tcg_out_sxt(s, ext, MO_8, a0, a1);
2037 break;
2038 case INDEX_op_ext16s_i64:
2039 case INDEX_op_ext16s_i32:
2040 tcg_out_sxt(s, ext, MO_16, a0, a1);
2041 break;
2042 case INDEX_op_ext_i32_i64:
2043 case INDEX_op_ext32s_i64:
2044 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2045 break;
2046 case INDEX_op_ext8u_i64:
2047 case INDEX_op_ext8u_i32:
2048 tcg_out_uxt(s, MO_8, a0, a1);
2049 break;
2050 case INDEX_op_ext16u_i64:
2051 case INDEX_op_ext16u_i32:
2052 tcg_out_uxt(s, MO_16, a0, a1);
2053 break;
2054 case INDEX_op_extu_i32_i64:
2055 case INDEX_op_ext32u_i64:
2056 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2057 break;
2058
2059 case INDEX_op_deposit_i64:
2060 case INDEX_op_deposit_i32:
2061 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2062 break;
2063
2064 case INDEX_op_extract_i64:
2065 case INDEX_op_extract_i32:
2066 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2067 break;
2068
2069 case INDEX_op_sextract_i64:
2070 case INDEX_op_sextract_i32:
2071 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2072 break;
2073
2074 case INDEX_op_extract2_i64:
2075 case INDEX_op_extract2_i32:
2076 tcg_out_extr(s, ext, a0, a1, a2, args[3]);
2077 break;
2078
2079 case INDEX_op_add2_i32:
2080 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2081 (int32_t)args[4], args[5], const_args[4],
2082 const_args[5], false);
2083 break;
2084 case INDEX_op_add2_i64:
2085 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2086 args[5], const_args[4], const_args[5], false);
2087 break;
2088 case INDEX_op_sub2_i32:
2089 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2090 (int32_t)args[4], args[5], const_args[4],
2091 const_args[5], true);
2092 break;
2093 case INDEX_op_sub2_i64:
2094 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2095 args[5], const_args[4], const_args[5], true);
2096 break;
2097
2098 case INDEX_op_muluh_i64:
2099 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2100 break;
2101 case INDEX_op_mulsh_i64:
2102 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2103 break;
2104
2105 case INDEX_op_mb:
2106 tcg_out_mb(s, a0);
2107 break;
2108
2109 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2110 case INDEX_op_mov_i64:
2111 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2112 case INDEX_op_movi_i64:
2113 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2114 default:
2115 g_assert_not_reached();
2116 }
2117
2118 #undef REG0
2119 }
2120
2121 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2122 unsigned vecl, unsigned vece,
2123 const TCGArg *args, const int *const_args)
2124 {
2125 static const AArch64Insn cmp_insn[16] = {
2126 [TCG_COND_EQ] = I3616_CMEQ,
2127 [TCG_COND_GT] = I3616_CMGT,
2128 [TCG_COND_GE] = I3616_CMGE,
2129 [TCG_COND_GTU] = I3616_CMHI,
2130 [TCG_COND_GEU] = I3616_CMHS,
2131 };
2132 static const AArch64Insn cmp0_insn[16] = {
2133 [TCG_COND_EQ] = I3617_CMEQ0,
2134 [TCG_COND_GT] = I3617_CMGT0,
2135 [TCG_COND_GE] = I3617_CMGE0,
2136 [TCG_COND_LT] = I3617_CMLT0,
2137 [TCG_COND_LE] = I3617_CMLE0,
2138 };
2139
2140 TCGType type = vecl + TCG_TYPE_V64;
2141 unsigned is_q = vecl;
2142 TCGArg a0, a1, a2;
2143
2144 a0 = args[0];
2145 a1 = args[1];
2146 a2 = args[2];
2147
2148 switch (opc) {
2149 case INDEX_op_ld_vec:
2150 tcg_out_ld(s, type, a0, a1, a2);
2151 break;
2152 case INDEX_op_st_vec:
2153 tcg_out_st(s, type, a0, a1, a2);
2154 break;
2155 case INDEX_op_add_vec:
2156 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2157 break;
2158 case INDEX_op_sub_vec:
2159 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2160 break;
2161 case INDEX_op_mul_vec:
2162 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2163 break;
2164 case INDEX_op_neg_vec:
2165 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2166 break;
2167 case INDEX_op_and_vec:
2168 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2169 break;
2170 case INDEX_op_or_vec:
2171 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2172 break;
2173 case INDEX_op_xor_vec:
2174 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2175 break;
2176 case INDEX_op_andc_vec:
2177 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2178 break;
2179 case INDEX_op_orc_vec:
2180 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2181 break;
2182 case INDEX_op_ssadd_vec:
2183 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2184 break;
2185 case INDEX_op_sssub_vec:
2186 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2187 break;
2188 case INDEX_op_usadd_vec:
2189 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2190 break;
2191 case INDEX_op_ussub_vec:
2192 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2193 break;
2194 case INDEX_op_smax_vec:
2195 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2196 break;
2197 case INDEX_op_smin_vec:
2198 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2199 break;
2200 case INDEX_op_umax_vec:
2201 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2202 break;
2203 case INDEX_op_umin_vec:
2204 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2205 break;
2206 case INDEX_op_not_vec:
2207 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2208 break;
2209 case INDEX_op_shli_vec:
2210 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2211 break;
2212 case INDEX_op_shri_vec:
2213 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2214 break;
2215 case INDEX_op_sari_vec:
2216 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2217 break;
2218 case INDEX_op_cmp_vec:
2219 {
2220 TCGCond cond = args[3];
2221 AArch64Insn insn;
2222
2223 if (cond == TCG_COND_NE) {
2224 if (const_args[2]) {
2225 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2226 } else {
2227 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2228 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2229 }
2230 } else {
2231 if (const_args[2]) {
2232 insn = cmp0_insn[cond];
2233 if (insn) {
2234 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2235 break;
2236 }
2237 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2238 a2 = TCG_VEC_TMP;
2239 }
2240 insn = cmp_insn[cond];
2241 if (insn == 0) {
2242 TCGArg t;
2243 t = a1, a1 = a2, a2 = t;
2244 cond = tcg_swap_cond(cond);
2245 insn = cmp_insn[cond];
2246 tcg_debug_assert(insn != 0);
2247 }
2248 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2249 }
2250 }
2251 break;
2252
2253 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2254 case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
2255 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2256 default:
2257 g_assert_not_reached();
2258 }
2259 }
2260
2261 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2262 {
2263 switch (opc) {
2264 case INDEX_op_add_vec:
2265 case INDEX_op_sub_vec:
2266 case INDEX_op_and_vec:
2267 case INDEX_op_or_vec:
2268 case INDEX_op_xor_vec:
2269 case INDEX_op_andc_vec:
2270 case INDEX_op_orc_vec:
2271 case INDEX_op_neg_vec:
2272 case INDEX_op_not_vec:
2273 case INDEX_op_cmp_vec:
2274 case INDEX_op_shli_vec:
2275 case INDEX_op_shri_vec:
2276 case INDEX_op_sari_vec:
2277 case INDEX_op_ssadd_vec:
2278 case INDEX_op_sssub_vec:
2279 case INDEX_op_usadd_vec:
2280 case INDEX_op_ussub_vec:
2281 case INDEX_op_smax_vec:
2282 case INDEX_op_smin_vec:
2283 case INDEX_op_umax_vec:
2284 case INDEX_op_umin_vec:
2285 return 1;
2286 case INDEX_op_mul_vec:
2287 return vece < MO_64;
2288
2289 default:
2290 return 0;
2291 }
2292 }
2293
2294 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2295 TCGArg a0, ...)
2296 {
2297 }
2298
2299 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2300 {
2301 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2302 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2303 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2304 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2305 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2306 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2307 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2308 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2309 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2310 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2311 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2312 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2313 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2314 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2315 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2316 static const TCGTargetOpDef r_r_rAL
2317 = { .args_ct_str = { "r", "r", "rAL" } };
2318 static const TCGTargetOpDef dep
2319 = { .args_ct_str = { "r", "0", "rZ" } };
2320 static const TCGTargetOpDef ext2
2321 = { .args_ct_str = { "r", "rZ", "rZ" } };
2322 static const TCGTargetOpDef movc
2323 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2324 static const TCGTargetOpDef add2
2325 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2326
2327 switch (op) {
2328 case INDEX_op_goto_ptr:
2329 return &r;
2330
2331 case INDEX_op_ld8u_i32:
2332 case INDEX_op_ld8s_i32:
2333 case INDEX_op_ld16u_i32:
2334 case INDEX_op_ld16s_i32:
2335 case INDEX_op_ld_i32:
2336 case INDEX_op_ld8u_i64:
2337 case INDEX_op_ld8s_i64:
2338 case INDEX_op_ld16u_i64:
2339 case INDEX_op_ld16s_i64:
2340 case INDEX_op_ld32u_i64:
2341 case INDEX_op_ld32s_i64:
2342 case INDEX_op_ld_i64:
2343 case INDEX_op_neg_i32:
2344 case INDEX_op_neg_i64:
2345 case INDEX_op_not_i32:
2346 case INDEX_op_not_i64:
2347 case INDEX_op_bswap16_i32:
2348 case INDEX_op_bswap32_i32:
2349 case INDEX_op_bswap16_i64:
2350 case INDEX_op_bswap32_i64:
2351 case INDEX_op_bswap64_i64:
2352 case INDEX_op_ext8s_i32:
2353 case INDEX_op_ext16s_i32:
2354 case INDEX_op_ext8u_i32:
2355 case INDEX_op_ext16u_i32:
2356 case INDEX_op_ext8s_i64:
2357 case INDEX_op_ext16s_i64:
2358 case INDEX_op_ext32s_i64:
2359 case INDEX_op_ext8u_i64:
2360 case INDEX_op_ext16u_i64:
2361 case INDEX_op_ext32u_i64:
2362 case INDEX_op_ext_i32_i64:
2363 case INDEX_op_extu_i32_i64:
2364 case INDEX_op_extract_i32:
2365 case INDEX_op_extract_i64:
2366 case INDEX_op_sextract_i32:
2367 case INDEX_op_sextract_i64:
2368 return &r_r;
2369
2370 case INDEX_op_st8_i32:
2371 case INDEX_op_st16_i32:
2372 case INDEX_op_st_i32:
2373 case INDEX_op_st8_i64:
2374 case INDEX_op_st16_i64:
2375 case INDEX_op_st32_i64:
2376 case INDEX_op_st_i64:
2377 return &rZ_r;
2378
2379 case INDEX_op_add_i32:
2380 case INDEX_op_add_i64:
2381 case INDEX_op_sub_i32:
2382 case INDEX_op_sub_i64:
2383 case INDEX_op_setcond_i32:
2384 case INDEX_op_setcond_i64:
2385 return &r_r_rA;
2386
2387 case INDEX_op_mul_i32:
2388 case INDEX_op_mul_i64:
2389 case INDEX_op_div_i32:
2390 case INDEX_op_div_i64:
2391 case INDEX_op_divu_i32:
2392 case INDEX_op_divu_i64:
2393 case INDEX_op_rem_i32:
2394 case INDEX_op_rem_i64:
2395 case INDEX_op_remu_i32:
2396 case INDEX_op_remu_i64:
2397 case INDEX_op_muluh_i64:
2398 case INDEX_op_mulsh_i64:
2399 return &r_r_r;
2400
2401 case INDEX_op_and_i32:
2402 case INDEX_op_and_i64:
2403 case INDEX_op_or_i32:
2404 case INDEX_op_or_i64:
2405 case INDEX_op_xor_i32:
2406 case INDEX_op_xor_i64:
2407 case INDEX_op_andc_i32:
2408 case INDEX_op_andc_i64:
2409 case INDEX_op_orc_i32:
2410 case INDEX_op_orc_i64:
2411 case INDEX_op_eqv_i32:
2412 case INDEX_op_eqv_i64:
2413 return &r_r_rL;
2414
2415 case INDEX_op_shl_i32:
2416 case INDEX_op_shr_i32:
2417 case INDEX_op_sar_i32:
2418 case INDEX_op_rotl_i32:
2419 case INDEX_op_rotr_i32:
2420 case INDEX_op_shl_i64:
2421 case INDEX_op_shr_i64:
2422 case INDEX_op_sar_i64:
2423 case INDEX_op_rotl_i64:
2424 case INDEX_op_rotr_i64:
2425 return &r_r_ri;
2426
2427 case INDEX_op_clz_i32:
2428 case INDEX_op_ctz_i32:
2429 case INDEX_op_clz_i64:
2430 case INDEX_op_ctz_i64:
2431 return &r_r_rAL;
2432
2433 case INDEX_op_brcond_i32:
2434 case INDEX_op_brcond_i64:
2435 return &r_rA;
2436
2437 case INDEX_op_movcond_i32:
2438 case INDEX_op_movcond_i64:
2439 return &movc;
2440
2441 case INDEX_op_qemu_ld_i32:
2442 case INDEX_op_qemu_ld_i64:
2443 return &r_l;
2444 case INDEX_op_qemu_st_i32:
2445 case INDEX_op_qemu_st_i64:
2446 return &lZ_l;
2447
2448 case INDEX_op_deposit_i32:
2449 case INDEX_op_deposit_i64:
2450 return &dep;
2451
2452 case INDEX_op_extract2_i32:
2453 case INDEX_op_extract2_i64:
2454 return &ext2;
2455
2456 case INDEX_op_add2_i32:
2457 case INDEX_op_add2_i64:
2458 case INDEX_op_sub2_i32:
2459 case INDEX_op_sub2_i64:
2460 return &add2;
2461
2462 case INDEX_op_add_vec:
2463 case INDEX_op_sub_vec:
2464 case INDEX_op_mul_vec:
2465 case INDEX_op_and_vec:
2466 case INDEX_op_or_vec:
2467 case INDEX_op_xor_vec:
2468 case INDEX_op_andc_vec:
2469 case INDEX_op_orc_vec:
2470 case INDEX_op_ssadd_vec:
2471 case INDEX_op_sssub_vec:
2472 case INDEX_op_usadd_vec:
2473 case INDEX_op_ussub_vec:
2474 case INDEX_op_smax_vec:
2475 case INDEX_op_smin_vec:
2476 case INDEX_op_umax_vec:
2477 case INDEX_op_umin_vec:
2478 return &w_w_w;
2479 case INDEX_op_not_vec:
2480 case INDEX_op_neg_vec:
2481 case INDEX_op_shli_vec:
2482 case INDEX_op_shri_vec:
2483 case INDEX_op_sari_vec:
2484 return &w_w;
2485 case INDEX_op_ld_vec:
2486 case INDEX_op_st_vec:
2487 return &w_r;
2488 case INDEX_op_dup_vec:
2489 return &w_wr;
2490 case INDEX_op_cmp_vec:
2491 return &w_w_wZ;
2492
2493 default:
2494 return NULL;
2495 }
2496 }
2497
2498 static void tcg_target_init(TCGContext *s)
2499 {
2500 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2501 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2502 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2503 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2504
2505 tcg_target_call_clobber_regs = -1ull;
2506 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2507 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2508 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2509 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2510 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2511 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2512 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2513 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2514 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2515 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2516 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2517 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2518 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2519 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2520 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2521 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2522 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2523 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2524 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2525
2526 s->reserved_regs = 0;
2527 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2528 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2529 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2530 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2531 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2532 }
2533
2534 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2535 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2536
2537 #define FRAME_SIZE \
2538 ((PUSH_SIZE \
2539 + TCG_STATIC_CALL_ARGS_SIZE \
2540 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2541 + TCG_TARGET_STACK_ALIGN - 1) \
2542 & ~(TCG_TARGET_STACK_ALIGN - 1))
2543
2544 /* We're expecting a 2 byte uleb128 encoded value. */
2545 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2546
2547 /* We're expecting to use a single ADDI insn. */
2548 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2549
2550 static void tcg_target_qemu_prologue(TCGContext *s)
2551 {
2552 TCGReg r;
2553
2554 /* Push (FP, LR) and allocate space for all saved registers. */
2555 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2556 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2557
2558 /* Set up frame pointer for canonical unwinding. */
2559 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2560
2561 /* Store callee-preserved regs x19..x28. */
2562 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2563 int ofs = (r - TCG_REG_X19 + 2) * 8;
2564 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2565 }
2566
2567 /* Make stack space for TCG locals. */
2568 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2569 FRAME_SIZE - PUSH_SIZE);
2570
2571 /* Inform TCG about how to find TCG locals with register, offset, size. */
2572 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2573 CPU_TEMP_BUF_NLONGS * sizeof(long));
2574
2575 #if !defined(CONFIG_SOFTMMU)
2576 if (USE_GUEST_BASE) {
2577 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2578 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2579 }
2580 #endif
2581
2582 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2583 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2584
2585 /*
2586 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2587 * and fall through to the rest of the epilogue.
2588 */
2589 s->code_gen_epilogue = s->code_ptr;
2590 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2591
2592 /* TB epilogue */
2593 tb_ret_addr = s->code_ptr;
2594
2595 /* Remove TCG locals stack space. */
2596 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2597 FRAME_SIZE - PUSH_SIZE);
2598
2599 /* Restore registers x19..x28. */
2600 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2601 int ofs = (r - TCG_REG_X19 + 2) * 8;
2602 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2603 }
2604
2605 /* Pop (FP, LR), restore SP to previous frame. */
2606 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2607 TCG_REG_SP, PUSH_SIZE, 0, 1);
2608 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2609 }
2610
2611 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2612 {
2613 int i;
2614 for (i = 0; i < count; ++i) {
2615 p[i] = NOP;
2616 }
2617 }
2618
2619 typedef struct {
2620 DebugFrameHeader h;
2621 uint8_t fde_def_cfa[4];
2622 uint8_t fde_reg_ofs[24];
2623 } DebugFrame;
2624
2625 #define ELF_HOST_MACHINE EM_AARCH64
2626
2627 static const DebugFrame debug_frame = {
2628 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2629 .h.cie.id = -1,
2630 .h.cie.version = 1,
2631 .h.cie.code_align = 1,
2632 .h.cie.data_align = 0x78, /* sleb128 -8 */
2633 .h.cie.return_column = TCG_REG_LR,
2634
2635 /* Total FDE size does not include the "len" member. */
2636 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2637
2638 .fde_def_cfa = {
2639 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2640 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2641 (FRAME_SIZE >> 7)
2642 },
2643 .fde_reg_ofs = {
2644 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2645 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2646 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2647 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2648 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2649 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2650 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2651 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2652 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2653 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2654 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2655 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2656 }
2657 };
2658
2659 void tcg_register_jit(void *buf, size_t buf_size)
2660 {
2661 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2662 }