]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/aarch64/tcg-target.inc.c
tcg/aarch64: Implement vector minmax arithmetic
[mirror_qemu.git] / tcg / aarch64 / tcg-target.inc.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26 "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31 "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32 };
33 #endif /* CONFIG_DEBUG_TCG */
34
35 static const int tcg_target_reg_alloc_order[] = {
36 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38 TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42 TCG_REG_X16, TCG_REG_X17,
43
44 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47 /* X18 reserved by system */
48 /* X19 reserved for AREG0 */
49 /* X29 reserved as fp */
50 /* X30 reserved as temporary */
51
52 TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53 TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54 /* V8 - V15 are call-saved, and skipped. */
55 TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56 TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57 TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58 TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59 };
60
61 static const int tcg_target_call_iarg_regs[8] = {
62 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64 };
65 static const int tcg_target_call_oarg_regs[1] = {
66 TCG_REG_X0
67 };
68
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
71
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74 as that actaully encodes SP. So if we need to zero-extend the guest
75 address, via the address index register slot, we need to load even
76 a zero guest base into a register. */
77 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
80
81 static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
82 {
83 ptrdiff_t offset = target - code_ptr;
84 if (offset == sextract64(offset, 0, 26)) {
85 /* read instruction, mask away previous PC_REL26 parameter contents,
86 set the proper offset, then write back the instruction. */
87 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88 return true;
89 }
90 return false;
91 }
92
93 static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
94 {
95 ptrdiff_t offset = target - code_ptr;
96 if (offset == sextract64(offset, 0, 19)) {
97 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
98 return true;
99 }
100 return false;
101 }
102
103 static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
104 intptr_t value, intptr_t addend)
105 {
106 tcg_debug_assert(addend == 0);
107 switch (type) {
108 case R_AARCH64_JUMP26:
109 case R_AARCH64_CALL26:
110 return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
111 case R_AARCH64_CONDBR19:
112 return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
113 default:
114 g_assert_not_reached();
115 }
116 }
117
118 #define TCG_CT_CONST_AIMM 0x100
119 #define TCG_CT_CONST_LIMM 0x200
120 #define TCG_CT_CONST_ZERO 0x400
121 #define TCG_CT_CONST_MONE 0x800
122
123 /* parse target specific constraints */
124 static const char *target_parse_constraint(TCGArgConstraint *ct,
125 const char *ct_str, TCGType type)
126 {
127 switch (*ct_str++) {
128 case 'r': /* general registers */
129 ct->ct |= TCG_CT_REG;
130 ct->u.regs |= 0xffffffffu;
131 break;
132 case 'w': /* advsimd registers */
133 ct->ct |= TCG_CT_REG;
134 ct->u.regs |= 0xffffffff00000000ull;
135 break;
136 case 'l': /* qemu_ld / qemu_st address, data_reg */
137 ct->ct |= TCG_CT_REG;
138 ct->u.regs = 0xffffffffu;
139 #ifdef CONFIG_SOFTMMU
140 /* x0 and x1 will be overwritten when reading the tlb entry,
141 and x2, and x3 for helper args, better to avoid using them. */
142 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
143 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
144 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
145 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
146 #endif
147 break;
148 case 'A': /* Valid for arithmetic immediate (positive or negative). */
149 ct->ct |= TCG_CT_CONST_AIMM;
150 break;
151 case 'L': /* Valid for logical immediate. */
152 ct->ct |= TCG_CT_CONST_LIMM;
153 break;
154 case 'M': /* minus one */
155 ct->ct |= TCG_CT_CONST_MONE;
156 break;
157 case 'Z': /* zero */
158 ct->ct |= TCG_CT_CONST_ZERO;
159 break;
160 default:
161 return NULL;
162 }
163 return ct_str;
164 }
165
166 /* Match a constant valid for addition (12-bit, optionally shifted). */
167 static inline bool is_aimm(uint64_t val)
168 {
169 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
170 }
171
172 /* Match a constant valid for logical operations. */
173 static inline bool is_limm(uint64_t val)
174 {
175 /* Taking a simplified view of the logical immediates for now, ignoring
176 the replication that can happen across the field. Match bit patterns
177 of the forms
178 0....01....1
179 0..01..10..0
180 and their inverses. */
181
182 /* Make things easier below, by testing the form with msb clear. */
183 if ((int64_t)val < 0) {
184 val = ~val;
185 }
186 if (val == 0) {
187 return false;
188 }
189 val += val & -val;
190 return (val & (val - 1)) == 0;
191 }
192
193 /* Match a constant that is valid for vectors. */
194 static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
195 {
196 int i;
197
198 *op = 0;
199 /* Match replication across 8 bits. */
200 if (v64 == dup_const(MO_8, v64)) {
201 *cmode = 0xe;
202 *imm8 = v64 & 0xff;
203 return true;
204 }
205 /* Match replication across 16 bits. */
206 if (v64 == dup_const(MO_16, v64)) {
207 uint16_t v16 = v64;
208
209 if (v16 == (v16 & 0xff)) {
210 *cmode = 0x8;
211 *imm8 = v16 & 0xff;
212 return true;
213 } else if (v16 == (v16 & 0xff00)) {
214 *cmode = 0xa;
215 *imm8 = v16 >> 8;
216 return true;
217 }
218 }
219 /* Match replication across 32 bits. */
220 if (v64 == dup_const(MO_32, v64)) {
221 uint32_t v32 = v64;
222
223 if (v32 == (v32 & 0xff)) {
224 *cmode = 0x0;
225 *imm8 = v32 & 0xff;
226 return true;
227 } else if (v32 == (v32 & 0xff00)) {
228 *cmode = 0x2;
229 *imm8 = (v32 >> 8) & 0xff;
230 return true;
231 } else if (v32 == (v32 & 0xff0000)) {
232 *cmode = 0x4;
233 *imm8 = (v32 >> 16) & 0xff;
234 return true;
235 } else if (v32 == (v32 & 0xff000000)) {
236 *cmode = 0x6;
237 *imm8 = v32 >> 24;
238 return true;
239 } else if ((v32 & 0xffff00ff) == 0xff) {
240 *cmode = 0xc;
241 *imm8 = (v32 >> 8) & 0xff;
242 return true;
243 } else if ((v32 & 0xff00ffff) == 0xffff) {
244 *cmode = 0xd;
245 *imm8 = (v32 >> 16) & 0xff;
246 return true;
247 }
248 /* Match forms of a float32. */
249 if (extract32(v32, 0, 19) == 0
250 && (extract32(v32, 25, 6) == 0x20
251 || extract32(v32, 25, 6) == 0x1f)) {
252 *cmode = 0xf;
253 *imm8 = (extract32(v32, 31, 1) << 7)
254 | (extract32(v32, 25, 1) << 6)
255 | extract32(v32, 19, 6);
256 return true;
257 }
258 }
259 /* Match forms of a float64. */
260 if (extract64(v64, 0, 48) == 0
261 && (extract64(v64, 54, 9) == 0x100
262 || extract64(v64, 54, 9) == 0x0ff)) {
263 *cmode = 0xf;
264 *op = 1;
265 *imm8 = (extract64(v64, 63, 1) << 7)
266 | (extract64(v64, 54, 1) << 6)
267 | extract64(v64, 48, 6);
268 return true;
269 }
270 /* Match bytes of 0x00 and 0xff. */
271 for (i = 0; i < 64; i += 8) {
272 uint64_t byte = extract64(v64, i, 8);
273 if (byte != 0 && byte != 0xff) {
274 break;
275 }
276 }
277 if (i == 64) {
278 *cmode = 0xe;
279 *op = 1;
280 *imm8 = (extract64(v64, 0, 1) << 0)
281 | (extract64(v64, 8, 1) << 1)
282 | (extract64(v64, 16, 1) << 2)
283 | (extract64(v64, 24, 1) << 3)
284 | (extract64(v64, 32, 1) << 4)
285 | (extract64(v64, 40, 1) << 5)
286 | (extract64(v64, 48, 1) << 6)
287 | (extract64(v64, 56, 1) << 7);
288 return true;
289 }
290 return false;
291 }
292
293 static int tcg_target_const_match(tcg_target_long val, TCGType type,
294 const TCGArgConstraint *arg_ct)
295 {
296 int ct = arg_ct->ct;
297
298 if (ct & TCG_CT_CONST) {
299 return 1;
300 }
301 if (type == TCG_TYPE_I32) {
302 val = (int32_t)val;
303 }
304 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
305 return 1;
306 }
307 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
308 return 1;
309 }
310 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
311 return 1;
312 }
313 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
314 return 1;
315 }
316
317 return 0;
318 }
319
320 enum aarch64_cond_code {
321 COND_EQ = 0x0,
322 COND_NE = 0x1,
323 COND_CS = 0x2, /* Unsigned greater or equal */
324 COND_HS = COND_CS, /* ALIAS greater or equal */
325 COND_CC = 0x3, /* Unsigned less than */
326 COND_LO = COND_CC, /* ALIAS Lower */
327 COND_MI = 0x4, /* Negative */
328 COND_PL = 0x5, /* Zero or greater */
329 COND_VS = 0x6, /* Overflow */
330 COND_VC = 0x7, /* No overflow */
331 COND_HI = 0x8, /* Unsigned greater than */
332 COND_LS = 0x9, /* Unsigned less or equal */
333 COND_GE = 0xa,
334 COND_LT = 0xb,
335 COND_GT = 0xc,
336 COND_LE = 0xd,
337 COND_AL = 0xe,
338 COND_NV = 0xf, /* behaves like COND_AL here */
339 };
340
341 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
342 [TCG_COND_EQ] = COND_EQ,
343 [TCG_COND_NE] = COND_NE,
344 [TCG_COND_LT] = COND_LT,
345 [TCG_COND_GE] = COND_GE,
346 [TCG_COND_LE] = COND_LE,
347 [TCG_COND_GT] = COND_GT,
348 /* unsigned */
349 [TCG_COND_LTU] = COND_LO,
350 [TCG_COND_GTU] = COND_HI,
351 [TCG_COND_GEU] = COND_HS,
352 [TCG_COND_LEU] = COND_LS,
353 };
354
355 typedef enum {
356 LDST_ST = 0, /* store */
357 LDST_LD = 1, /* load */
358 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
359 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
360 } AArch64LdstType;
361
362 /* We encode the format of the insn into the beginning of the name, so that
363 we can have the preprocessor help "typecheck" the insn vs the output
364 function. Arm didn't provide us with nice names for the formats, so we
365 use the section number of the architecture reference manual in which the
366 instruction group is described. */
367 typedef enum {
368 /* Compare and branch (immediate). */
369 I3201_CBZ = 0x34000000,
370 I3201_CBNZ = 0x35000000,
371
372 /* Conditional branch (immediate). */
373 I3202_B_C = 0x54000000,
374
375 /* Unconditional branch (immediate). */
376 I3206_B = 0x14000000,
377 I3206_BL = 0x94000000,
378
379 /* Unconditional branch (register). */
380 I3207_BR = 0xd61f0000,
381 I3207_BLR = 0xd63f0000,
382 I3207_RET = 0xd65f0000,
383
384 /* Load literal for loading the address at pc-relative offset */
385 I3305_LDR = 0x58000000,
386 I3305_LDR_v64 = 0x5c000000,
387 I3305_LDR_v128 = 0x9c000000,
388
389 /* Load/store register. Described here as 3.3.12, but the helper
390 that emits them can transform to 3.3.10 or 3.3.13. */
391 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
392 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
393 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
394 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
395
396 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
397 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
398 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
399 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
400
401 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
402 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
403
404 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
405 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
406 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
407
408 I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
409 I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
410
411 I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
412 I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
413
414 I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
415 I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
416
417 I3312_TO_I3310 = 0x00200800,
418 I3312_TO_I3313 = 0x01000000,
419
420 /* Load/store register pair instructions. */
421 I3314_LDP = 0x28400000,
422 I3314_STP = 0x28000000,
423
424 /* Add/subtract immediate instructions. */
425 I3401_ADDI = 0x11000000,
426 I3401_ADDSI = 0x31000000,
427 I3401_SUBI = 0x51000000,
428 I3401_SUBSI = 0x71000000,
429
430 /* Bitfield instructions. */
431 I3402_BFM = 0x33000000,
432 I3402_SBFM = 0x13000000,
433 I3402_UBFM = 0x53000000,
434
435 /* Extract instruction. */
436 I3403_EXTR = 0x13800000,
437
438 /* Logical immediate instructions. */
439 I3404_ANDI = 0x12000000,
440 I3404_ORRI = 0x32000000,
441 I3404_EORI = 0x52000000,
442
443 /* Move wide immediate instructions. */
444 I3405_MOVN = 0x12800000,
445 I3405_MOVZ = 0x52800000,
446 I3405_MOVK = 0x72800000,
447
448 /* PC relative addressing instructions. */
449 I3406_ADR = 0x10000000,
450 I3406_ADRP = 0x90000000,
451
452 /* Add/subtract shifted register instructions (without a shift). */
453 I3502_ADD = 0x0b000000,
454 I3502_ADDS = 0x2b000000,
455 I3502_SUB = 0x4b000000,
456 I3502_SUBS = 0x6b000000,
457
458 /* Add/subtract shifted register instructions (with a shift). */
459 I3502S_ADD_LSL = I3502_ADD,
460
461 /* Add/subtract with carry instructions. */
462 I3503_ADC = 0x1a000000,
463 I3503_SBC = 0x5a000000,
464
465 /* Conditional select instructions. */
466 I3506_CSEL = 0x1a800000,
467 I3506_CSINC = 0x1a800400,
468 I3506_CSINV = 0x5a800000,
469 I3506_CSNEG = 0x5a800400,
470
471 /* Data-processing (1 source) instructions. */
472 I3507_CLZ = 0x5ac01000,
473 I3507_RBIT = 0x5ac00000,
474 I3507_REV16 = 0x5ac00400,
475 I3507_REV32 = 0x5ac00800,
476 I3507_REV64 = 0x5ac00c00,
477
478 /* Data-processing (2 source) instructions. */
479 I3508_LSLV = 0x1ac02000,
480 I3508_LSRV = 0x1ac02400,
481 I3508_ASRV = 0x1ac02800,
482 I3508_RORV = 0x1ac02c00,
483 I3508_SMULH = 0x9b407c00,
484 I3508_UMULH = 0x9bc07c00,
485 I3508_UDIV = 0x1ac00800,
486 I3508_SDIV = 0x1ac00c00,
487
488 /* Data-processing (3 source) instructions. */
489 I3509_MADD = 0x1b000000,
490 I3509_MSUB = 0x1b008000,
491
492 /* Logical shifted register instructions (without a shift). */
493 I3510_AND = 0x0a000000,
494 I3510_BIC = 0x0a200000,
495 I3510_ORR = 0x2a000000,
496 I3510_ORN = 0x2a200000,
497 I3510_EOR = 0x4a000000,
498 I3510_EON = 0x4a200000,
499 I3510_ANDS = 0x6a000000,
500
501 /* AdvSIMD copy */
502 I3605_DUP = 0x0e000400,
503 I3605_INS = 0x4e001c00,
504 I3605_UMOV = 0x0e003c00,
505
506 /* AdvSIMD modified immediate */
507 I3606_MOVI = 0x0f000400,
508
509 /* AdvSIMD shift by immediate */
510 I3614_SSHR = 0x0f000400,
511 I3614_SSRA = 0x0f001400,
512 I3614_SHL = 0x0f005400,
513 I3614_USHR = 0x2f000400,
514 I3614_USRA = 0x2f001400,
515
516 /* AdvSIMD three same. */
517 I3616_ADD = 0x0e208400,
518 I3616_AND = 0x0e201c00,
519 I3616_BIC = 0x0e601c00,
520 I3616_EOR = 0x2e201c00,
521 I3616_MUL = 0x0e209c00,
522 I3616_ORR = 0x0ea01c00,
523 I3616_ORN = 0x0ee01c00,
524 I3616_SUB = 0x2e208400,
525 I3616_CMGT = 0x0e203400,
526 I3616_CMGE = 0x0e203c00,
527 I3616_CMTST = 0x0e208c00,
528 I3616_CMHI = 0x2e203400,
529 I3616_CMHS = 0x2e203c00,
530 I3616_CMEQ = 0x2e208c00,
531 I3616_SMAX = 0x0e206400,
532 I3616_SMIN = 0x0e206c00,
533 I3616_SQADD = 0x0e200c00,
534 I3616_SQSUB = 0x0e202c00,
535 I3616_UMAX = 0x2e206400,
536 I3616_UMIN = 0x2e206c00,
537 I3616_UQADD = 0x2e200c00,
538 I3616_UQSUB = 0x2e202c00,
539
540 /* AdvSIMD two-reg misc. */
541 I3617_CMGT0 = 0x0e208800,
542 I3617_CMEQ0 = 0x0e209800,
543 I3617_CMLT0 = 0x0e20a800,
544 I3617_CMGE0 = 0x2e208800,
545 I3617_CMLE0 = 0x2e20a800,
546 I3617_NOT = 0x2e205800,
547 I3617_NEG = 0x2e20b800,
548
549 /* System instructions. */
550 NOP = 0xd503201f,
551 DMB_ISH = 0xd50338bf,
552 DMB_LD = 0x00000100,
553 DMB_ST = 0x00000200,
554 } AArch64Insn;
555
556 static inline uint32_t tcg_in32(TCGContext *s)
557 {
558 uint32_t v = *(uint32_t *)s->code_ptr;
559 return v;
560 }
561
562 /* Emit an opcode with "type-checking" of the format. */
563 #define tcg_out_insn(S, FMT, OP, ...) \
564 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
565
566 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
567 {
568 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
569 }
570
571 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
572 TCGReg rt, int imm19)
573 {
574 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
575 }
576
577 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
578 TCGCond c, int imm19)
579 {
580 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
581 }
582
583 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
584 {
585 tcg_out32(s, insn | (imm26 & 0x03ffffff));
586 }
587
588 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
589 {
590 tcg_out32(s, insn | rn << 5);
591 }
592
593 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
594 TCGReg r1, TCGReg r2, TCGReg rn,
595 tcg_target_long ofs, bool pre, bool w)
596 {
597 insn |= 1u << 31; /* ext */
598 insn |= pre << 24;
599 insn |= w << 23;
600
601 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
602 insn |= (ofs & (0x7f << 3)) << (15 - 3);
603
604 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
605 }
606
607 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
608 TCGReg rd, TCGReg rn, uint64_t aimm)
609 {
610 if (aimm > 0xfff) {
611 tcg_debug_assert((aimm & 0xfff) == 0);
612 aimm >>= 12;
613 tcg_debug_assert(aimm <= 0xfff);
614 aimm |= 1 << 12; /* apply LSL 12 */
615 }
616 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
617 }
618
619 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
620 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
621 that feed the DecodeBitMasks pseudo function. */
622 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
623 TCGReg rd, TCGReg rn, int n, int immr, int imms)
624 {
625 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
626 | rn << 5 | rd);
627 }
628
629 #define tcg_out_insn_3404 tcg_out_insn_3402
630
631 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
632 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
633 {
634 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
635 | rn << 5 | rd);
636 }
637
638 /* This function is used for the Move (wide immediate) instruction group.
639 Note that SHIFT is a full shift count, not the 2 bit HW field. */
640 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
641 TCGReg rd, uint16_t half, unsigned shift)
642 {
643 tcg_debug_assert((shift & ~0x30) == 0);
644 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
645 }
646
647 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
648 TCGReg rd, int64_t disp)
649 {
650 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
651 }
652
653 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
654 the rare occasion when we actually want to supply a shift amount. */
655 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
656 TCGType ext, TCGReg rd, TCGReg rn,
657 TCGReg rm, int imm6)
658 {
659 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
660 }
661
662 /* This function is for 3.5.2 (Add/subtract shifted register),
663 and 3.5.10 (Logical shifted register), for the vast majorty of cases
664 when we don't want to apply a shift. Thus it can also be used for
665 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
666 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
667 TCGReg rd, TCGReg rn, TCGReg rm)
668 {
669 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
670 }
671
672 #define tcg_out_insn_3503 tcg_out_insn_3502
673 #define tcg_out_insn_3508 tcg_out_insn_3502
674 #define tcg_out_insn_3510 tcg_out_insn_3502
675
676 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
677 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
678 {
679 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
680 | tcg_cond_to_aarch64[c] << 12);
681 }
682
683 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
684 TCGReg rd, TCGReg rn)
685 {
686 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
687 }
688
689 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
690 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
691 {
692 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
693 }
694
695 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
696 TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
697 {
698 /* Note that bit 11 set means general register input. Therefore
699 we can handle both register sets with one function. */
700 tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
701 | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
702 }
703
704 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
705 TCGReg rd, bool op, int cmode, uint8_t imm8)
706 {
707 tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
708 | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
709 }
710
711 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
712 TCGReg rd, TCGReg rn, unsigned immhb)
713 {
714 tcg_out32(s, insn | q << 30 | immhb << 16
715 | (rn & 0x1f) << 5 | (rd & 0x1f));
716 }
717
718 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
719 unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
720 {
721 tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
722 | (rn & 0x1f) << 5 | (rd & 0x1f));
723 }
724
725 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
726 unsigned size, TCGReg rd, TCGReg rn)
727 {
728 tcg_out32(s, insn | q << 30 | (size << 22)
729 | (rn & 0x1f) << 5 | (rd & 0x1f));
730 }
731
732 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
733 TCGReg rd, TCGReg base, TCGType ext,
734 TCGReg regoff)
735 {
736 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
737 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
738 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
739 }
740
741 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
742 TCGReg rd, TCGReg rn, intptr_t offset)
743 {
744 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
745 }
746
747 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
748 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
749 {
750 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
751 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
752 | rn << 5 | (rd & 0x1f));
753 }
754
755 /* Register to register move using ORR (shifted register with no shift). */
756 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
757 {
758 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
759 }
760
761 /* Register to register move using ADDI (move to/from SP). */
762 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
763 {
764 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
765 }
766
767 /* This function is used for the Logical (immediate) instruction group.
768 The value of LIMM must satisfy IS_LIMM. See the comment above about
769 only supporting simplified logical immediates. */
770 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
771 TCGReg rd, TCGReg rn, uint64_t limm)
772 {
773 unsigned h, l, r, c;
774
775 tcg_debug_assert(is_limm(limm));
776
777 h = clz64(limm);
778 l = ctz64(limm);
779 if (l == 0) {
780 r = 0; /* form 0....01....1 */
781 c = ctz64(~limm) - 1;
782 if (h == 0) {
783 r = clz64(~limm); /* form 1..10..01..1 */
784 c += r;
785 }
786 } else {
787 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
788 c = r - h - 1;
789 }
790 if (ext == TCG_TYPE_I32) {
791 r &= 31;
792 c &= 31;
793 }
794
795 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
796 }
797
798 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
799 TCGReg rd, uint64_t v64)
800 {
801 int op, cmode, imm8;
802
803 if (is_fimm(v64, &op, &cmode, &imm8)) {
804 tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
805 } else if (type == TCG_TYPE_V128) {
806 new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
807 tcg_out_insn(s, 3305, LDR_v128, 0, rd);
808 } else {
809 new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
810 tcg_out_insn(s, 3305, LDR_v64, 0, rd);
811 }
812 }
813
814 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
815 tcg_target_long value)
816 {
817 tcg_target_long svalue = value;
818 tcg_target_long ivalue = ~value;
819 tcg_target_long t0, t1, t2;
820 int s0, s1;
821 AArch64Insn opc;
822
823 switch (type) {
824 case TCG_TYPE_I32:
825 case TCG_TYPE_I64:
826 tcg_debug_assert(rd < 32);
827 break;
828
829 case TCG_TYPE_V64:
830 case TCG_TYPE_V128:
831 tcg_debug_assert(rd >= 32);
832 tcg_out_dupi_vec(s, type, rd, value);
833 return;
834
835 default:
836 g_assert_not_reached();
837 }
838
839 /* For 32-bit values, discard potential garbage in value. For 64-bit
840 values within [2**31, 2**32-1], we can create smaller sequences by
841 interpreting this as a negative 32-bit number, while ensuring that
842 the high 32 bits are cleared by setting SF=0. */
843 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
844 svalue = (int32_t)value;
845 value = (uint32_t)value;
846 ivalue = (uint32_t)ivalue;
847 type = TCG_TYPE_I32;
848 }
849
850 /* Speed things up by handling the common case of small positive
851 and negative values specially. */
852 if ((value & ~0xffffull) == 0) {
853 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
854 return;
855 } else if ((ivalue & ~0xffffull) == 0) {
856 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
857 return;
858 }
859
860 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
861 use the sign-extended value. That lets us match rotated values such
862 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
863 if (is_limm(svalue)) {
864 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
865 return;
866 }
867
868 /* Look for host pointer values within 4G of the PC. This happens
869 often when loading pointers to QEMU's own data structures. */
870 if (type == TCG_TYPE_I64) {
871 tcg_target_long disp = value - (intptr_t)s->code_ptr;
872 if (disp == sextract64(disp, 0, 21)) {
873 tcg_out_insn(s, 3406, ADR, rd, disp);
874 return;
875 }
876 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
877 if (disp == sextract64(disp, 0, 21)) {
878 tcg_out_insn(s, 3406, ADRP, rd, disp);
879 if (value & 0xfff) {
880 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
881 }
882 return;
883 }
884 }
885
886 /* Would it take fewer insns to begin with MOVN? */
887 if (ctpop64(value) >= 32) {
888 t0 = ivalue;
889 opc = I3405_MOVN;
890 } else {
891 t0 = value;
892 opc = I3405_MOVZ;
893 }
894 s0 = ctz64(t0) & (63 & -16);
895 t1 = t0 & ~(0xffffUL << s0);
896 s1 = ctz64(t1) & (63 & -16);
897 t2 = t1 & ~(0xffffUL << s1);
898 if (t2 == 0) {
899 tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
900 if (t1 != 0) {
901 tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
902 }
903 return;
904 }
905
906 /* For more than 2 insns, dump it into the constant pool. */
907 new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
908 tcg_out_insn(s, 3305, LDR, 0, rd);
909 }
910
911 /* Define something more legible for general use. */
912 #define tcg_out_ldst_r tcg_out_insn_3310
913
914 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
915 TCGReg rn, intptr_t offset, int lgsize)
916 {
917 /* If the offset is naturally aligned and in range, then we can
918 use the scaled uimm12 encoding */
919 if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
920 uintptr_t scaled_uimm = offset >> lgsize;
921 if (scaled_uimm <= 0xfff) {
922 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
923 return;
924 }
925 }
926
927 /* Small signed offsets can use the unscaled encoding. */
928 if (offset >= -256 && offset < 256) {
929 tcg_out_insn_3312(s, insn, rd, rn, offset);
930 return;
931 }
932
933 /* Worst-case scenario, move offset to temp register, use reg offset. */
934 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
935 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
936 }
937
938 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
939 {
940 if (ret == arg) {
941 return;
942 }
943 switch (type) {
944 case TCG_TYPE_I32:
945 case TCG_TYPE_I64:
946 if (ret < 32 && arg < 32) {
947 tcg_out_movr(s, type, ret, arg);
948 break;
949 } else if (ret < 32) {
950 tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
951 break;
952 } else if (arg < 32) {
953 tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
954 break;
955 }
956 /* FALLTHRU */
957
958 case TCG_TYPE_V64:
959 tcg_debug_assert(ret >= 32 && arg >= 32);
960 tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
961 break;
962 case TCG_TYPE_V128:
963 tcg_debug_assert(ret >= 32 && arg >= 32);
964 tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
965 break;
966
967 default:
968 g_assert_not_reached();
969 }
970 }
971
972 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
973 TCGReg base, intptr_t ofs)
974 {
975 AArch64Insn insn;
976 int lgsz;
977
978 switch (type) {
979 case TCG_TYPE_I32:
980 insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
981 lgsz = 2;
982 break;
983 case TCG_TYPE_I64:
984 insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
985 lgsz = 3;
986 break;
987 case TCG_TYPE_V64:
988 insn = I3312_LDRVD;
989 lgsz = 3;
990 break;
991 case TCG_TYPE_V128:
992 insn = I3312_LDRVQ;
993 lgsz = 4;
994 break;
995 default:
996 g_assert_not_reached();
997 }
998 tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
999 }
1000
1001 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1002 TCGReg base, intptr_t ofs)
1003 {
1004 AArch64Insn insn;
1005 int lgsz;
1006
1007 switch (type) {
1008 case TCG_TYPE_I32:
1009 insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1010 lgsz = 2;
1011 break;
1012 case TCG_TYPE_I64:
1013 insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1014 lgsz = 3;
1015 break;
1016 case TCG_TYPE_V64:
1017 insn = I3312_STRVD;
1018 lgsz = 3;
1019 break;
1020 case TCG_TYPE_V128:
1021 insn = I3312_STRVQ;
1022 lgsz = 4;
1023 break;
1024 default:
1025 g_assert_not_reached();
1026 }
1027 tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1028 }
1029
1030 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1031 TCGReg base, intptr_t ofs)
1032 {
1033 if (type <= TCG_TYPE_I64 && val == 0) {
1034 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1035 return true;
1036 }
1037 return false;
1038 }
1039
1040 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1041 TCGReg rn, unsigned int a, unsigned int b)
1042 {
1043 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1044 }
1045
1046 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1047 TCGReg rn, unsigned int a, unsigned int b)
1048 {
1049 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1050 }
1051
1052 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1053 TCGReg rn, unsigned int a, unsigned int b)
1054 {
1055 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1056 }
1057
1058 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1059 TCGReg rn, TCGReg rm, unsigned int a)
1060 {
1061 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1062 }
1063
1064 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1065 TCGReg rd, TCGReg rn, unsigned int m)
1066 {
1067 int bits = ext ? 64 : 32;
1068 int max = bits - 1;
1069 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1070 }
1071
1072 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1073 TCGReg rd, TCGReg rn, unsigned int m)
1074 {
1075 int max = ext ? 63 : 31;
1076 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1077 }
1078
1079 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1080 TCGReg rd, TCGReg rn, unsigned int m)
1081 {
1082 int max = ext ? 63 : 31;
1083 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1084 }
1085
1086 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1087 TCGReg rd, TCGReg rn, unsigned int m)
1088 {
1089 int max = ext ? 63 : 31;
1090 tcg_out_extr(s, ext, rd, rn, rn, m & max);
1091 }
1092
1093 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1094 TCGReg rd, TCGReg rn, unsigned int m)
1095 {
1096 int bits = ext ? 64 : 32;
1097 int max = bits - 1;
1098 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1099 }
1100
1101 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1102 TCGReg rn, unsigned lsb, unsigned width)
1103 {
1104 unsigned size = ext ? 64 : 32;
1105 unsigned a = (size - lsb) & (size - 1);
1106 unsigned b = width - 1;
1107 tcg_out_bfm(s, ext, rd, rn, a, b);
1108 }
1109
1110 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1111 tcg_target_long b, bool const_b)
1112 {
1113 if (const_b) {
1114 /* Using CMP or CMN aliases. */
1115 if (b >= 0) {
1116 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1117 } else {
1118 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1119 }
1120 } else {
1121 /* Using CMP alias SUBS wzr, Wn, Wm */
1122 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1123 }
1124 }
1125
1126 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1127 {
1128 ptrdiff_t offset = target - s->code_ptr;
1129 tcg_debug_assert(offset == sextract64(offset, 0, 26));
1130 tcg_out_insn(s, 3206, B, offset);
1131 }
1132
1133 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1134 {
1135 ptrdiff_t offset = target - s->code_ptr;
1136 if (offset == sextract64(offset, 0, 26)) {
1137 tcg_out_insn(s, 3206, BL, offset);
1138 } else {
1139 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1140 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1141 }
1142 }
1143
1144 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1145 {
1146 tcg_out_insn(s, 3207, BLR, reg);
1147 }
1148
1149 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1150 {
1151 ptrdiff_t offset = target - s->code_ptr;
1152 if (offset == sextract64(offset, 0, 26)) {
1153 tcg_out_insn(s, 3206, BL, offset);
1154 } else {
1155 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1156 tcg_out_callr(s, TCG_REG_TMP);
1157 }
1158 }
1159
1160 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1161 uintptr_t addr)
1162 {
1163 tcg_insn_unit i1, i2;
1164 TCGType rt = TCG_TYPE_I64;
1165 TCGReg rd = TCG_REG_TMP;
1166 uint64_t pair;
1167
1168 ptrdiff_t offset = addr - jmp_addr;
1169
1170 if (offset == sextract64(offset, 0, 26)) {
1171 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1172 i2 = NOP;
1173 } else {
1174 offset = (addr >> 12) - (jmp_addr >> 12);
1175
1176 /* patch ADRP */
1177 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1178 /* patch ADDI */
1179 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1180 }
1181 pair = (uint64_t)i2 << 32 | i1;
1182 atomic_set((uint64_t *)jmp_addr, pair);
1183 flush_icache_range(jmp_addr, jmp_addr + 8);
1184 }
1185
1186 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1187 {
1188 if (!l->has_value) {
1189 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1190 tcg_out_insn(s, 3206, B, 0);
1191 } else {
1192 tcg_out_goto(s, l->u.value_ptr);
1193 }
1194 }
1195
1196 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1197 TCGArg b, bool b_const, TCGLabel *l)
1198 {
1199 intptr_t offset;
1200 bool need_cmp;
1201
1202 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1203 need_cmp = false;
1204 } else {
1205 need_cmp = true;
1206 tcg_out_cmp(s, ext, a, b, b_const);
1207 }
1208
1209 if (!l->has_value) {
1210 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1211 offset = tcg_in32(s) >> 5;
1212 } else {
1213 offset = l->u.value_ptr - s->code_ptr;
1214 tcg_debug_assert(offset == sextract64(offset, 0, 19));
1215 }
1216
1217 if (need_cmp) {
1218 tcg_out_insn(s, 3202, B_C, c, offset);
1219 } else if (c == TCG_COND_EQ) {
1220 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1221 } else {
1222 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1223 }
1224 }
1225
1226 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1227 {
1228 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1229 }
1230
1231 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1232 {
1233 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1234 }
1235
1236 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1237 {
1238 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1239 }
1240
1241 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
1242 TCGReg rd, TCGReg rn)
1243 {
1244 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1245 int bits = (8 << s_bits) - 1;
1246 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1247 }
1248
1249 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
1250 TCGReg rd, TCGReg rn)
1251 {
1252 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1253 int bits = (8 << s_bits) - 1;
1254 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1255 }
1256
1257 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1258 TCGReg rn, int64_t aimm)
1259 {
1260 if (aimm >= 0) {
1261 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1262 } else {
1263 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1264 }
1265 }
1266
1267 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1268 TCGReg rh, TCGReg al, TCGReg ah,
1269 tcg_target_long bl, tcg_target_long bh,
1270 bool const_bl, bool const_bh, bool sub)
1271 {
1272 TCGReg orig_rl = rl;
1273 AArch64Insn insn;
1274
1275 if (rl == ah || (!const_bh && rl == bh)) {
1276 rl = TCG_REG_TMP;
1277 }
1278
1279 if (const_bl) {
1280 insn = I3401_ADDSI;
1281 if ((bl < 0) ^ sub) {
1282 insn = I3401_SUBSI;
1283 bl = -bl;
1284 }
1285 if (unlikely(al == TCG_REG_XZR)) {
1286 /* ??? We want to allow al to be zero for the benefit of
1287 negation via subtraction. However, that leaves open the
1288 possibility of adding 0+const in the low part, and the
1289 immediate add instructions encode XSP not XZR. Don't try
1290 anything more elaborate here than loading another zero. */
1291 al = TCG_REG_TMP;
1292 tcg_out_movi(s, ext, al, 0);
1293 }
1294 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1295 } else {
1296 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1297 }
1298
1299 insn = I3503_ADC;
1300 if (const_bh) {
1301 /* Note that the only two constants we support are 0 and -1, and
1302 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1303 if ((bh != 0) ^ sub) {
1304 insn = I3503_SBC;
1305 }
1306 bh = TCG_REG_XZR;
1307 } else if (sub) {
1308 insn = I3503_SBC;
1309 }
1310 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1311
1312 tcg_out_mov(s, ext, orig_rl, rl);
1313 }
1314
1315 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1316 {
1317 static const uint32_t sync[] = {
1318 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1319 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1320 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1321 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1322 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1323 };
1324 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1325 }
1326
1327 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1328 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1329 {
1330 TCGReg a1 = a0;
1331 if (is_ctz) {
1332 a1 = TCG_REG_TMP;
1333 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1334 }
1335 if (const_b && b == (ext ? 64 : 32)) {
1336 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1337 } else {
1338 AArch64Insn sel = I3506_CSEL;
1339
1340 tcg_out_cmp(s, ext, a0, 0, 1);
1341 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1342
1343 if (const_b) {
1344 if (b == -1) {
1345 b = TCG_REG_XZR;
1346 sel = I3506_CSINV;
1347 } else if (b == 0) {
1348 b = TCG_REG_XZR;
1349 } else {
1350 tcg_out_movi(s, ext, d, b);
1351 b = d;
1352 }
1353 }
1354 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1355 }
1356 }
1357
1358 #ifdef CONFIG_SOFTMMU
1359 #include "tcg-ldst.inc.c"
1360
1361 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1362 * TCGMemOpIdx oi, uintptr_t ra)
1363 */
1364 static void * const qemu_ld_helpers[16] = {
1365 [MO_UB] = helper_ret_ldub_mmu,
1366 [MO_LEUW] = helper_le_lduw_mmu,
1367 [MO_LEUL] = helper_le_ldul_mmu,
1368 [MO_LEQ] = helper_le_ldq_mmu,
1369 [MO_BEUW] = helper_be_lduw_mmu,
1370 [MO_BEUL] = helper_be_ldul_mmu,
1371 [MO_BEQ] = helper_be_ldq_mmu,
1372 };
1373
1374 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1375 * uintxx_t val, TCGMemOpIdx oi,
1376 * uintptr_t ra)
1377 */
1378 static void * const qemu_st_helpers[16] = {
1379 [MO_UB] = helper_ret_stb_mmu,
1380 [MO_LEUW] = helper_le_stw_mmu,
1381 [MO_LEUL] = helper_le_stl_mmu,
1382 [MO_LEQ] = helper_le_stq_mmu,
1383 [MO_BEUW] = helper_be_stw_mmu,
1384 [MO_BEUL] = helper_be_stl_mmu,
1385 [MO_BEQ] = helper_be_stq_mmu,
1386 };
1387
1388 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1389 {
1390 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1391 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1392 tcg_out_insn(s, 3406, ADR, rd, offset);
1393 }
1394
1395 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1396 {
1397 TCGMemOpIdx oi = lb->oi;
1398 TCGMemOp opc = get_memop(oi);
1399 TCGMemOp size = opc & MO_SIZE;
1400
1401 bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
1402 tcg_debug_assert(ok);
1403
1404 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1405 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1406 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1407 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1408 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1409 if (opc & MO_SIGN) {
1410 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1411 } else {
1412 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1413 }
1414
1415 tcg_out_goto(s, lb->raddr);
1416 }
1417
1418 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1419 {
1420 TCGMemOpIdx oi = lb->oi;
1421 TCGMemOp opc = get_memop(oi);
1422 TCGMemOp size = opc & MO_SIZE;
1423
1424 bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
1425 tcg_debug_assert(ok);
1426
1427 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1428 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1429 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1430 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1431 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1432 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1433 tcg_out_goto(s, lb->raddr);
1434 }
1435
1436 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1437 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1438 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1439 {
1440 TCGLabelQemuLdst *label = new_ldst_label(s);
1441
1442 label->is_ld = is_ld;
1443 label->oi = oi;
1444 label->type = ext;
1445 label->datalo_reg = data_reg;
1446 label->addrlo_reg = addr_reg;
1447 label->raddr = raddr;
1448 label->label_ptr[0] = label_ptr;
1449 }
1450
1451 /* Load and compare a TLB entry, emitting the conditional jump to the
1452 slow path for the failure case, which will be patched later when finalizing
1453 the slow path. Generated code returns the host addend in X1,
1454 clobbers X0,X2,X3,TMP. */
1455 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1456 tcg_insn_unit **label_ptr, int mem_index,
1457 bool is_read)
1458 {
1459 int tlb_offset = is_read ?
1460 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1461 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1462 unsigned a_bits = get_alignment_bits(opc);
1463 unsigned s_bits = opc & MO_SIZE;
1464 unsigned a_mask = (1u << a_bits) - 1;
1465 unsigned s_mask = (1u << s_bits) - 1;
1466 TCGReg base = TCG_AREG0, x3;
1467 uint64_t tlb_mask;
1468
1469 /* For aligned accesses, we check the first byte and include the alignment
1470 bits within the address. For unaligned access, we check that we don't
1471 cross pages using the address of the last byte of the access. */
1472 if (a_bits >= s_bits) {
1473 x3 = addr_reg;
1474 } else {
1475 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1476 TCG_REG_X3, addr_reg, s_mask - a_mask);
1477 x3 = TCG_REG_X3;
1478 }
1479 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1480
1481 /* Extract the TLB index from the address into X0.
1482 X0<CPU_TLB_BITS:0> =
1483 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1484 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1485 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1486
1487 /* Store the page mask part of the address into X3. */
1488 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1489 TCG_REG_X3, x3, tlb_mask);
1490
1491 /* Add any "high bits" from the tlb offset to the env address into X2,
1492 to take advantage of the LSL12 form of the ADDI instruction.
1493 X2 = env + (tlb_offset & 0xfff000) */
1494 if (tlb_offset & 0xfff000) {
1495 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1496 tlb_offset & 0xfff000);
1497 base = TCG_REG_X2;
1498 }
1499
1500 /* Merge the tlb index contribution into X2.
1501 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1502 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1503 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1504
1505 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1506 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1507 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1508 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff,
1509 TARGET_LONG_BITS == 32 ? 2 : 3);
1510
1511 /* Load the tlb addend. Do that early to avoid stalling.
1512 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1513 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1514 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1515 (is_read ? offsetof(CPUTLBEntry, addr_read)
1516 : offsetof(CPUTLBEntry, addr_write)), 3);
1517
1518 /* Perform the address comparison. */
1519 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1520
1521 /* If not equal, we jump to the slow path. */
1522 *label_ptr = s->code_ptr;
1523 tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1524 }
1525
1526 #endif /* CONFIG_SOFTMMU */
1527
1528 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1529 TCGReg data_r, TCGReg addr_r,
1530 TCGType otype, TCGReg off_r)
1531 {
1532 const TCGMemOp bswap = memop & MO_BSWAP;
1533
1534 switch (memop & MO_SSIZE) {
1535 case MO_UB:
1536 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1537 break;
1538 case MO_SB:
1539 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1540 data_r, addr_r, otype, off_r);
1541 break;
1542 case MO_UW:
1543 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1544 if (bswap) {
1545 tcg_out_rev16(s, data_r, data_r);
1546 }
1547 break;
1548 case MO_SW:
1549 if (bswap) {
1550 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1551 tcg_out_rev16(s, data_r, data_r);
1552 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1553 } else {
1554 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1555 data_r, addr_r, otype, off_r);
1556 }
1557 break;
1558 case MO_UL:
1559 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1560 if (bswap) {
1561 tcg_out_rev32(s, data_r, data_r);
1562 }
1563 break;
1564 case MO_SL:
1565 if (bswap) {
1566 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1567 tcg_out_rev32(s, data_r, data_r);
1568 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1569 } else {
1570 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1571 }
1572 break;
1573 case MO_Q:
1574 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1575 if (bswap) {
1576 tcg_out_rev64(s, data_r, data_r);
1577 }
1578 break;
1579 default:
1580 tcg_abort();
1581 }
1582 }
1583
1584 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1585 TCGReg data_r, TCGReg addr_r,
1586 TCGType otype, TCGReg off_r)
1587 {
1588 const TCGMemOp bswap = memop & MO_BSWAP;
1589
1590 switch (memop & MO_SIZE) {
1591 case MO_8:
1592 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1593 break;
1594 case MO_16:
1595 if (bswap && data_r != TCG_REG_XZR) {
1596 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1597 data_r = TCG_REG_TMP;
1598 }
1599 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1600 break;
1601 case MO_32:
1602 if (bswap && data_r != TCG_REG_XZR) {
1603 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1604 data_r = TCG_REG_TMP;
1605 }
1606 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1607 break;
1608 case MO_64:
1609 if (bswap && data_r != TCG_REG_XZR) {
1610 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1611 data_r = TCG_REG_TMP;
1612 }
1613 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1614 break;
1615 default:
1616 tcg_abort();
1617 }
1618 }
1619
1620 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1621 TCGMemOpIdx oi, TCGType ext)
1622 {
1623 TCGMemOp memop = get_memop(oi);
1624 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1625 #ifdef CONFIG_SOFTMMU
1626 unsigned mem_index = get_mmuidx(oi);
1627 tcg_insn_unit *label_ptr;
1628
1629 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1630 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1631 TCG_REG_X1, otype, addr_reg);
1632 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1633 s->code_ptr, label_ptr);
1634 #else /* !CONFIG_SOFTMMU */
1635 if (USE_GUEST_BASE) {
1636 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1637 TCG_REG_GUEST_BASE, otype, addr_reg);
1638 } else {
1639 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1640 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1641 }
1642 #endif /* CONFIG_SOFTMMU */
1643 }
1644
1645 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1646 TCGMemOpIdx oi)
1647 {
1648 TCGMemOp memop = get_memop(oi);
1649 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1650 #ifdef CONFIG_SOFTMMU
1651 unsigned mem_index = get_mmuidx(oi);
1652 tcg_insn_unit *label_ptr;
1653
1654 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1655 tcg_out_qemu_st_direct(s, memop, data_reg,
1656 TCG_REG_X1, otype, addr_reg);
1657 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1658 data_reg, addr_reg, s->code_ptr, label_ptr);
1659 #else /* !CONFIG_SOFTMMU */
1660 if (USE_GUEST_BASE) {
1661 tcg_out_qemu_st_direct(s, memop, data_reg,
1662 TCG_REG_GUEST_BASE, otype, addr_reg);
1663 } else {
1664 tcg_out_qemu_st_direct(s, memop, data_reg,
1665 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1666 }
1667 #endif /* CONFIG_SOFTMMU */
1668 }
1669
1670 static tcg_insn_unit *tb_ret_addr;
1671
1672 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1673 const TCGArg args[TCG_MAX_OP_ARGS],
1674 const int const_args[TCG_MAX_OP_ARGS])
1675 {
1676 /* 99% of the time, we can signal the use of extension registers
1677 by looking to see if the opcode handles 64-bit data. */
1678 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1679
1680 /* Hoist the loads of the most common arguments. */
1681 TCGArg a0 = args[0];
1682 TCGArg a1 = args[1];
1683 TCGArg a2 = args[2];
1684 int c2 = const_args[2];
1685
1686 /* Some operands are defined with "rZ" constraint, a register or
1687 the zero register. These need not actually test args[I] == 0. */
1688 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1689
1690 switch (opc) {
1691 case INDEX_op_exit_tb:
1692 /* Reuse the zeroing that exists for goto_ptr. */
1693 if (a0 == 0) {
1694 tcg_out_goto_long(s, s->code_gen_epilogue);
1695 } else {
1696 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1697 tcg_out_goto_long(s, tb_ret_addr);
1698 }
1699 break;
1700
1701 case INDEX_op_goto_tb:
1702 if (s->tb_jmp_insn_offset != NULL) {
1703 /* TCG_TARGET_HAS_direct_jump */
1704 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1705 write can be used to patch the target address. */
1706 if ((uintptr_t)s->code_ptr & 7) {
1707 tcg_out32(s, NOP);
1708 }
1709 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1710 /* actual branch destination will be patched by
1711 tb_target_set_jmp_target later. */
1712 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1713 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1714 } else {
1715 /* !TCG_TARGET_HAS_direct_jump */
1716 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1717 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1718 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1719 }
1720 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1721 set_jmp_reset_offset(s, a0);
1722 break;
1723
1724 case INDEX_op_goto_ptr:
1725 tcg_out_insn(s, 3207, BR, a0);
1726 break;
1727
1728 case INDEX_op_br:
1729 tcg_out_goto_label(s, arg_label(a0));
1730 break;
1731
1732 case INDEX_op_ld8u_i32:
1733 case INDEX_op_ld8u_i64:
1734 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1735 break;
1736 case INDEX_op_ld8s_i32:
1737 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1738 break;
1739 case INDEX_op_ld8s_i64:
1740 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1741 break;
1742 case INDEX_op_ld16u_i32:
1743 case INDEX_op_ld16u_i64:
1744 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1745 break;
1746 case INDEX_op_ld16s_i32:
1747 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1748 break;
1749 case INDEX_op_ld16s_i64:
1750 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1751 break;
1752 case INDEX_op_ld_i32:
1753 case INDEX_op_ld32u_i64:
1754 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1755 break;
1756 case INDEX_op_ld32s_i64:
1757 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1758 break;
1759 case INDEX_op_ld_i64:
1760 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1761 break;
1762
1763 case INDEX_op_st8_i32:
1764 case INDEX_op_st8_i64:
1765 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1766 break;
1767 case INDEX_op_st16_i32:
1768 case INDEX_op_st16_i64:
1769 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1770 break;
1771 case INDEX_op_st_i32:
1772 case INDEX_op_st32_i64:
1773 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1774 break;
1775 case INDEX_op_st_i64:
1776 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1777 break;
1778
1779 case INDEX_op_add_i32:
1780 a2 = (int32_t)a2;
1781 /* FALLTHRU */
1782 case INDEX_op_add_i64:
1783 if (c2) {
1784 tcg_out_addsubi(s, ext, a0, a1, a2);
1785 } else {
1786 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1787 }
1788 break;
1789
1790 case INDEX_op_sub_i32:
1791 a2 = (int32_t)a2;
1792 /* FALLTHRU */
1793 case INDEX_op_sub_i64:
1794 if (c2) {
1795 tcg_out_addsubi(s, ext, a0, a1, -a2);
1796 } else {
1797 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1798 }
1799 break;
1800
1801 case INDEX_op_neg_i64:
1802 case INDEX_op_neg_i32:
1803 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1804 break;
1805
1806 case INDEX_op_and_i32:
1807 a2 = (int32_t)a2;
1808 /* FALLTHRU */
1809 case INDEX_op_and_i64:
1810 if (c2) {
1811 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1812 } else {
1813 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1814 }
1815 break;
1816
1817 case INDEX_op_andc_i32:
1818 a2 = (int32_t)a2;
1819 /* FALLTHRU */
1820 case INDEX_op_andc_i64:
1821 if (c2) {
1822 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1823 } else {
1824 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1825 }
1826 break;
1827
1828 case INDEX_op_or_i32:
1829 a2 = (int32_t)a2;
1830 /* FALLTHRU */
1831 case INDEX_op_or_i64:
1832 if (c2) {
1833 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1834 } else {
1835 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1836 }
1837 break;
1838
1839 case INDEX_op_orc_i32:
1840 a2 = (int32_t)a2;
1841 /* FALLTHRU */
1842 case INDEX_op_orc_i64:
1843 if (c2) {
1844 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1845 } else {
1846 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1847 }
1848 break;
1849
1850 case INDEX_op_xor_i32:
1851 a2 = (int32_t)a2;
1852 /* FALLTHRU */
1853 case INDEX_op_xor_i64:
1854 if (c2) {
1855 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1856 } else {
1857 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1858 }
1859 break;
1860
1861 case INDEX_op_eqv_i32:
1862 a2 = (int32_t)a2;
1863 /* FALLTHRU */
1864 case INDEX_op_eqv_i64:
1865 if (c2) {
1866 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1867 } else {
1868 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1869 }
1870 break;
1871
1872 case INDEX_op_not_i64:
1873 case INDEX_op_not_i32:
1874 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1875 break;
1876
1877 case INDEX_op_mul_i64:
1878 case INDEX_op_mul_i32:
1879 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1880 break;
1881
1882 case INDEX_op_div_i64:
1883 case INDEX_op_div_i32:
1884 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1885 break;
1886 case INDEX_op_divu_i64:
1887 case INDEX_op_divu_i32:
1888 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1889 break;
1890
1891 case INDEX_op_rem_i64:
1892 case INDEX_op_rem_i32:
1893 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1894 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1895 break;
1896 case INDEX_op_remu_i64:
1897 case INDEX_op_remu_i32:
1898 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1899 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1900 break;
1901
1902 case INDEX_op_shl_i64:
1903 case INDEX_op_shl_i32:
1904 if (c2) {
1905 tcg_out_shl(s, ext, a0, a1, a2);
1906 } else {
1907 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1908 }
1909 break;
1910
1911 case INDEX_op_shr_i64:
1912 case INDEX_op_shr_i32:
1913 if (c2) {
1914 tcg_out_shr(s, ext, a0, a1, a2);
1915 } else {
1916 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1917 }
1918 break;
1919
1920 case INDEX_op_sar_i64:
1921 case INDEX_op_sar_i32:
1922 if (c2) {
1923 tcg_out_sar(s, ext, a0, a1, a2);
1924 } else {
1925 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1926 }
1927 break;
1928
1929 case INDEX_op_rotr_i64:
1930 case INDEX_op_rotr_i32:
1931 if (c2) {
1932 tcg_out_rotr(s, ext, a0, a1, a2);
1933 } else {
1934 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1935 }
1936 break;
1937
1938 case INDEX_op_rotl_i64:
1939 case INDEX_op_rotl_i32:
1940 if (c2) {
1941 tcg_out_rotl(s, ext, a0, a1, a2);
1942 } else {
1943 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1944 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1945 }
1946 break;
1947
1948 case INDEX_op_clz_i64:
1949 case INDEX_op_clz_i32:
1950 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1951 break;
1952 case INDEX_op_ctz_i64:
1953 case INDEX_op_ctz_i32:
1954 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1955 break;
1956
1957 case INDEX_op_brcond_i32:
1958 a1 = (int32_t)a1;
1959 /* FALLTHRU */
1960 case INDEX_op_brcond_i64:
1961 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1962 break;
1963
1964 case INDEX_op_setcond_i32:
1965 a2 = (int32_t)a2;
1966 /* FALLTHRU */
1967 case INDEX_op_setcond_i64:
1968 tcg_out_cmp(s, ext, a1, a2, c2);
1969 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1970 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1971 TCG_REG_XZR, tcg_invert_cond(args[3]));
1972 break;
1973
1974 case INDEX_op_movcond_i32:
1975 a2 = (int32_t)a2;
1976 /* FALLTHRU */
1977 case INDEX_op_movcond_i64:
1978 tcg_out_cmp(s, ext, a1, a2, c2);
1979 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1980 break;
1981
1982 case INDEX_op_qemu_ld_i32:
1983 case INDEX_op_qemu_ld_i64:
1984 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1985 break;
1986 case INDEX_op_qemu_st_i32:
1987 case INDEX_op_qemu_st_i64:
1988 tcg_out_qemu_st(s, REG0(0), a1, a2);
1989 break;
1990
1991 case INDEX_op_bswap64_i64:
1992 tcg_out_rev64(s, a0, a1);
1993 break;
1994 case INDEX_op_bswap32_i64:
1995 case INDEX_op_bswap32_i32:
1996 tcg_out_rev32(s, a0, a1);
1997 break;
1998 case INDEX_op_bswap16_i64:
1999 case INDEX_op_bswap16_i32:
2000 tcg_out_rev16(s, a0, a1);
2001 break;
2002
2003 case INDEX_op_ext8s_i64:
2004 case INDEX_op_ext8s_i32:
2005 tcg_out_sxt(s, ext, MO_8, a0, a1);
2006 break;
2007 case INDEX_op_ext16s_i64:
2008 case INDEX_op_ext16s_i32:
2009 tcg_out_sxt(s, ext, MO_16, a0, a1);
2010 break;
2011 case INDEX_op_ext_i32_i64:
2012 case INDEX_op_ext32s_i64:
2013 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2014 break;
2015 case INDEX_op_ext8u_i64:
2016 case INDEX_op_ext8u_i32:
2017 tcg_out_uxt(s, MO_8, a0, a1);
2018 break;
2019 case INDEX_op_ext16u_i64:
2020 case INDEX_op_ext16u_i32:
2021 tcg_out_uxt(s, MO_16, a0, a1);
2022 break;
2023 case INDEX_op_extu_i32_i64:
2024 case INDEX_op_ext32u_i64:
2025 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2026 break;
2027
2028 case INDEX_op_deposit_i64:
2029 case INDEX_op_deposit_i32:
2030 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2031 break;
2032
2033 case INDEX_op_extract_i64:
2034 case INDEX_op_extract_i32:
2035 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2036 break;
2037
2038 case INDEX_op_sextract_i64:
2039 case INDEX_op_sextract_i32:
2040 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2041 break;
2042
2043 case INDEX_op_add2_i32:
2044 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2045 (int32_t)args[4], args[5], const_args[4],
2046 const_args[5], false);
2047 break;
2048 case INDEX_op_add2_i64:
2049 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2050 args[5], const_args[4], const_args[5], false);
2051 break;
2052 case INDEX_op_sub2_i32:
2053 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2054 (int32_t)args[4], args[5], const_args[4],
2055 const_args[5], true);
2056 break;
2057 case INDEX_op_sub2_i64:
2058 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2059 args[5], const_args[4], const_args[5], true);
2060 break;
2061
2062 case INDEX_op_muluh_i64:
2063 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2064 break;
2065 case INDEX_op_mulsh_i64:
2066 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2067 break;
2068
2069 case INDEX_op_mb:
2070 tcg_out_mb(s, a0);
2071 break;
2072
2073 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2074 case INDEX_op_mov_i64:
2075 case INDEX_op_mov_vec:
2076 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2077 case INDEX_op_movi_i64:
2078 case INDEX_op_dupi_vec:
2079 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2080 default:
2081 g_assert_not_reached();
2082 }
2083
2084 #undef REG0
2085 }
2086
2087 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2088 unsigned vecl, unsigned vece,
2089 const TCGArg *args, const int *const_args)
2090 {
2091 static const AArch64Insn cmp_insn[16] = {
2092 [TCG_COND_EQ] = I3616_CMEQ,
2093 [TCG_COND_GT] = I3616_CMGT,
2094 [TCG_COND_GE] = I3616_CMGE,
2095 [TCG_COND_GTU] = I3616_CMHI,
2096 [TCG_COND_GEU] = I3616_CMHS,
2097 };
2098 static const AArch64Insn cmp0_insn[16] = {
2099 [TCG_COND_EQ] = I3617_CMEQ0,
2100 [TCG_COND_GT] = I3617_CMGT0,
2101 [TCG_COND_GE] = I3617_CMGE0,
2102 [TCG_COND_LT] = I3617_CMLT0,
2103 [TCG_COND_LE] = I3617_CMLE0,
2104 };
2105
2106 TCGType type = vecl + TCG_TYPE_V64;
2107 unsigned is_q = vecl;
2108 TCGArg a0, a1, a2;
2109
2110 a0 = args[0];
2111 a1 = args[1];
2112 a2 = args[2];
2113
2114 switch (opc) {
2115 case INDEX_op_ld_vec:
2116 tcg_out_ld(s, type, a0, a1, a2);
2117 break;
2118 case INDEX_op_st_vec:
2119 tcg_out_st(s, type, a0, a1, a2);
2120 break;
2121 case INDEX_op_add_vec:
2122 tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2123 break;
2124 case INDEX_op_sub_vec:
2125 tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2126 break;
2127 case INDEX_op_mul_vec:
2128 tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2129 break;
2130 case INDEX_op_neg_vec:
2131 tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2132 break;
2133 case INDEX_op_and_vec:
2134 tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2135 break;
2136 case INDEX_op_or_vec:
2137 tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2138 break;
2139 case INDEX_op_xor_vec:
2140 tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2141 break;
2142 case INDEX_op_andc_vec:
2143 tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2144 break;
2145 case INDEX_op_orc_vec:
2146 tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2147 break;
2148 case INDEX_op_ssadd_vec:
2149 tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2150 break;
2151 case INDEX_op_sssub_vec:
2152 tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2153 break;
2154 case INDEX_op_usadd_vec:
2155 tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2156 break;
2157 case INDEX_op_ussub_vec:
2158 tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2159 break;
2160 case INDEX_op_smax_vec:
2161 tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2162 break;
2163 case INDEX_op_smin_vec:
2164 tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2165 break;
2166 case INDEX_op_umax_vec:
2167 tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2168 break;
2169 case INDEX_op_umin_vec:
2170 tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2171 break;
2172 case INDEX_op_not_vec:
2173 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2174 break;
2175 case INDEX_op_dup_vec:
2176 tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
2177 break;
2178 case INDEX_op_shli_vec:
2179 tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2180 break;
2181 case INDEX_op_shri_vec:
2182 tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2183 break;
2184 case INDEX_op_sari_vec:
2185 tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2186 break;
2187 case INDEX_op_cmp_vec:
2188 {
2189 TCGCond cond = args[3];
2190 AArch64Insn insn;
2191
2192 if (cond == TCG_COND_NE) {
2193 if (const_args[2]) {
2194 tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2195 } else {
2196 tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2197 tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2198 }
2199 } else {
2200 if (const_args[2]) {
2201 insn = cmp0_insn[cond];
2202 if (insn) {
2203 tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2204 break;
2205 }
2206 tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2207 a2 = TCG_VEC_TMP;
2208 }
2209 insn = cmp_insn[cond];
2210 if (insn == 0) {
2211 TCGArg t;
2212 t = a1, a1 = a2, a2 = t;
2213 cond = tcg_swap_cond(cond);
2214 insn = cmp_insn[cond];
2215 tcg_debug_assert(insn != 0);
2216 }
2217 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2218 }
2219 }
2220 break;
2221 default:
2222 g_assert_not_reached();
2223 }
2224 }
2225
2226 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2227 {
2228 switch (opc) {
2229 case INDEX_op_add_vec:
2230 case INDEX_op_sub_vec:
2231 case INDEX_op_and_vec:
2232 case INDEX_op_or_vec:
2233 case INDEX_op_xor_vec:
2234 case INDEX_op_andc_vec:
2235 case INDEX_op_orc_vec:
2236 case INDEX_op_neg_vec:
2237 case INDEX_op_not_vec:
2238 case INDEX_op_cmp_vec:
2239 case INDEX_op_shli_vec:
2240 case INDEX_op_shri_vec:
2241 case INDEX_op_sari_vec:
2242 case INDEX_op_ssadd_vec:
2243 case INDEX_op_sssub_vec:
2244 case INDEX_op_usadd_vec:
2245 case INDEX_op_ussub_vec:
2246 case INDEX_op_smax_vec:
2247 case INDEX_op_smin_vec:
2248 case INDEX_op_umax_vec:
2249 case INDEX_op_umin_vec:
2250 return 1;
2251 case INDEX_op_mul_vec:
2252 return vece < MO_64;
2253
2254 default:
2255 return 0;
2256 }
2257 }
2258
2259 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2260 TCGArg a0, ...)
2261 {
2262 }
2263
2264 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2265 {
2266 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2267 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2268 static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2269 static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2270 static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2271 static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2272 static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2273 static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2274 static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2275 static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2276 static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2277 static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2278 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2279 static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2280 static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2281 static const TCGTargetOpDef r_r_rAL
2282 = { .args_ct_str = { "r", "r", "rAL" } };
2283 static const TCGTargetOpDef dep
2284 = { .args_ct_str = { "r", "0", "rZ" } };
2285 static const TCGTargetOpDef movc
2286 = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2287 static const TCGTargetOpDef add2
2288 = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2289
2290 switch (op) {
2291 case INDEX_op_goto_ptr:
2292 return &r;
2293
2294 case INDEX_op_ld8u_i32:
2295 case INDEX_op_ld8s_i32:
2296 case INDEX_op_ld16u_i32:
2297 case INDEX_op_ld16s_i32:
2298 case INDEX_op_ld_i32:
2299 case INDEX_op_ld8u_i64:
2300 case INDEX_op_ld8s_i64:
2301 case INDEX_op_ld16u_i64:
2302 case INDEX_op_ld16s_i64:
2303 case INDEX_op_ld32u_i64:
2304 case INDEX_op_ld32s_i64:
2305 case INDEX_op_ld_i64:
2306 case INDEX_op_neg_i32:
2307 case INDEX_op_neg_i64:
2308 case INDEX_op_not_i32:
2309 case INDEX_op_not_i64:
2310 case INDEX_op_bswap16_i32:
2311 case INDEX_op_bswap32_i32:
2312 case INDEX_op_bswap16_i64:
2313 case INDEX_op_bswap32_i64:
2314 case INDEX_op_bswap64_i64:
2315 case INDEX_op_ext8s_i32:
2316 case INDEX_op_ext16s_i32:
2317 case INDEX_op_ext8u_i32:
2318 case INDEX_op_ext16u_i32:
2319 case INDEX_op_ext8s_i64:
2320 case INDEX_op_ext16s_i64:
2321 case INDEX_op_ext32s_i64:
2322 case INDEX_op_ext8u_i64:
2323 case INDEX_op_ext16u_i64:
2324 case INDEX_op_ext32u_i64:
2325 case INDEX_op_ext_i32_i64:
2326 case INDEX_op_extu_i32_i64:
2327 case INDEX_op_extract_i32:
2328 case INDEX_op_extract_i64:
2329 case INDEX_op_sextract_i32:
2330 case INDEX_op_sextract_i64:
2331 return &r_r;
2332
2333 case INDEX_op_st8_i32:
2334 case INDEX_op_st16_i32:
2335 case INDEX_op_st_i32:
2336 case INDEX_op_st8_i64:
2337 case INDEX_op_st16_i64:
2338 case INDEX_op_st32_i64:
2339 case INDEX_op_st_i64:
2340 return &rZ_r;
2341
2342 case INDEX_op_add_i32:
2343 case INDEX_op_add_i64:
2344 case INDEX_op_sub_i32:
2345 case INDEX_op_sub_i64:
2346 case INDEX_op_setcond_i32:
2347 case INDEX_op_setcond_i64:
2348 return &r_r_rA;
2349
2350 case INDEX_op_mul_i32:
2351 case INDEX_op_mul_i64:
2352 case INDEX_op_div_i32:
2353 case INDEX_op_div_i64:
2354 case INDEX_op_divu_i32:
2355 case INDEX_op_divu_i64:
2356 case INDEX_op_rem_i32:
2357 case INDEX_op_rem_i64:
2358 case INDEX_op_remu_i32:
2359 case INDEX_op_remu_i64:
2360 case INDEX_op_muluh_i64:
2361 case INDEX_op_mulsh_i64:
2362 return &r_r_r;
2363
2364 case INDEX_op_and_i32:
2365 case INDEX_op_and_i64:
2366 case INDEX_op_or_i32:
2367 case INDEX_op_or_i64:
2368 case INDEX_op_xor_i32:
2369 case INDEX_op_xor_i64:
2370 case INDEX_op_andc_i32:
2371 case INDEX_op_andc_i64:
2372 case INDEX_op_orc_i32:
2373 case INDEX_op_orc_i64:
2374 case INDEX_op_eqv_i32:
2375 case INDEX_op_eqv_i64:
2376 return &r_r_rL;
2377
2378 case INDEX_op_shl_i32:
2379 case INDEX_op_shr_i32:
2380 case INDEX_op_sar_i32:
2381 case INDEX_op_rotl_i32:
2382 case INDEX_op_rotr_i32:
2383 case INDEX_op_shl_i64:
2384 case INDEX_op_shr_i64:
2385 case INDEX_op_sar_i64:
2386 case INDEX_op_rotl_i64:
2387 case INDEX_op_rotr_i64:
2388 return &r_r_ri;
2389
2390 case INDEX_op_clz_i32:
2391 case INDEX_op_ctz_i32:
2392 case INDEX_op_clz_i64:
2393 case INDEX_op_ctz_i64:
2394 return &r_r_rAL;
2395
2396 case INDEX_op_brcond_i32:
2397 case INDEX_op_brcond_i64:
2398 return &r_rA;
2399
2400 case INDEX_op_movcond_i32:
2401 case INDEX_op_movcond_i64:
2402 return &movc;
2403
2404 case INDEX_op_qemu_ld_i32:
2405 case INDEX_op_qemu_ld_i64:
2406 return &r_l;
2407 case INDEX_op_qemu_st_i32:
2408 case INDEX_op_qemu_st_i64:
2409 return &lZ_l;
2410
2411 case INDEX_op_deposit_i32:
2412 case INDEX_op_deposit_i64:
2413 return &dep;
2414
2415 case INDEX_op_add2_i32:
2416 case INDEX_op_add2_i64:
2417 case INDEX_op_sub2_i32:
2418 case INDEX_op_sub2_i64:
2419 return &add2;
2420
2421 case INDEX_op_add_vec:
2422 case INDEX_op_sub_vec:
2423 case INDEX_op_mul_vec:
2424 case INDEX_op_and_vec:
2425 case INDEX_op_or_vec:
2426 case INDEX_op_xor_vec:
2427 case INDEX_op_andc_vec:
2428 case INDEX_op_orc_vec:
2429 case INDEX_op_ssadd_vec:
2430 case INDEX_op_sssub_vec:
2431 case INDEX_op_usadd_vec:
2432 case INDEX_op_ussub_vec:
2433 case INDEX_op_smax_vec:
2434 case INDEX_op_smin_vec:
2435 case INDEX_op_umax_vec:
2436 case INDEX_op_umin_vec:
2437 return &w_w_w;
2438 case INDEX_op_not_vec:
2439 case INDEX_op_neg_vec:
2440 case INDEX_op_shli_vec:
2441 case INDEX_op_shri_vec:
2442 case INDEX_op_sari_vec:
2443 return &w_w;
2444 case INDEX_op_ld_vec:
2445 case INDEX_op_st_vec:
2446 return &w_r;
2447 case INDEX_op_dup_vec:
2448 return &w_wr;
2449 case INDEX_op_cmp_vec:
2450 return &w_w_wZ;
2451
2452 default:
2453 return NULL;
2454 }
2455 }
2456
2457 static void tcg_target_init(TCGContext *s)
2458 {
2459 tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2460 tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2461 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2462 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2463
2464 tcg_target_call_clobber_regs = -1ull;
2465 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2466 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2467 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2468 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2469 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2470 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2471 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2472 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2473 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2474 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2475 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2476 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2477 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2478 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2479 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2480 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2481 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2482 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2483 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2484
2485 s->reserved_regs = 0;
2486 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2487 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2488 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2489 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2490 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2491 }
2492
2493 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
2494 #define PUSH_SIZE ((30 - 19 + 1) * 8)
2495
2496 #define FRAME_SIZE \
2497 ((PUSH_SIZE \
2498 + TCG_STATIC_CALL_ARGS_SIZE \
2499 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2500 + TCG_TARGET_STACK_ALIGN - 1) \
2501 & ~(TCG_TARGET_STACK_ALIGN - 1))
2502
2503 /* We're expecting a 2 byte uleb128 encoded value. */
2504 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2505
2506 /* We're expecting to use a single ADDI insn. */
2507 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2508
2509 static void tcg_target_qemu_prologue(TCGContext *s)
2510 {
2511 TCGReg r;
2512
2513 /* Push (FP, LR) and allocate space for all saved registers. */
2514 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2515 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2516
2517 /* Set up frame pointer for canonical unwinding. */
2518 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2519
2520 /* Store callee-preserved regs x19..x28. */
2521 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2522 int ofs = (r - TCG_REG_X19 + 2) * 8;
2523 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2524 }
2525
2526 /* Make stack space for TCG locals. */
2527 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2528 FRAME_SIZE - PUSH_SIZE);
2529
2530 /* Inform TCG about how to find TCG locals with register, offset, size. */
2531 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2532 CPU_TEMP_BUF_NLONGS * sizeof(long));
2533
2534 #if !defined(CONFIG_SOFTMMU)
2535 if (USE_GUEST_BASE) {
2536 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2537 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2538 }
2539 #endif
2540
2541 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2542 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2543
2544 /*
2545 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2546 * and fall through to the rest of the epilogue.
2547 */
2548 s->code_gen_epilogue = s->code_ptr;
2549 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2550
2551 /* TB epilogue */
2552 tb_ret_addr = s->code_ptr;
2553
2554 /* Remove TCG locals stack space. */
2555 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2556 FRAME_SIZE - PUSH_SIZE);
2557
2558 /* Restore registers x19..x28. */
2559 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2560 int ofs = (r - TCG_REG_X19 + 2) * 8;
2561 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2562 }
2563
2564 /* Pop (FP, LR), restore SP to previous frame. */
2565 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2566 TCG_REG_SP, PUSH_SIZE, 0, 1);
2567 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2568 }
2569
2570 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2571 {
2572 int i;
2573 for (i = 0; i < count; ++i) {
2574 p[i] = NOP;
2575 }
2576 }
2577
2578 typedef struct {
2579 DebugFrameHeader h;
2580 uint8_t fde_def_cfa[4];
2581 uint8_t fde_reg_ofs[24];
2582 } DebugFrame;
2583
2584 #define ELF_HOST_MACHINE EM_AARCH64
2585
2586 static const DebugFrame debug_frame = {
2587 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2588 .h.cie.id = -1,
2589 .h.cie.version = 1,
2590 .h.cie.code_align = 1,
2591 .h.cie.data_align = 0x78, /* sleb128 -8 */
2592 .h.cie.return_column = TCG_REG_LR,
2593
2594 /* Total FDE size does not include the "len" member. */
2595 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2596
2597 .fde_def_cfa = {
2598 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2599 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2600 (FRAME_SIZE >> 7)
2601 },
2602 .fde_reg_ofs = {
2603 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2604 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2605 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2606 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2607 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2608 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2609 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2610 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2611 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2612 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2613 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2614 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2615 }
2616 };
2617
2618 void tcg_register_jit(void *buf, size_t buf_size)
2619 {
2620 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2621 }