]> git.proxmox.com Git - mirror_qemu.git/blob - tcg/arm/tcg-target.inc.c
tcg: Support arbitrary size + alignment
[mirror_qemu.git] / tcg / arm / tcg-target.inc.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Andrzej Zaborowski
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "elf.h"
26 #include "tcg-be-ldst.h"
27
28 /* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
29 #ifndef __ARM_ARCH
30 # if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
31 || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
32 || defined(__ARM_ARCH_7EM__)
33 # define __ARM_ARCH 7
34 # elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
35 || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
36 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
37 # define __ARM_ARCH 6
38 # elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
39 || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
40 || defined(__ARM_ARCH_5TEJ__)
41 # define __ARM_ARCH 5
42 # else
43 # define __ARM_ARCH 4
44 # endif
45 #endif
46
47 static int arm_arch = __ARM_ARCH;
48
49 #if defined(__ARM_ARCH_5T__) \
50 || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
51 # define use_armv5t_instructions 1
52 #else
53 # define use_armv5t_instructions use_armv6_instructions
54 #endif
55
56 #define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6)
57 #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
58
59 #ifndef use_idiv_instructions
60 bool use_idiv_instructions;
61 #endif
62
63 /* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */
64 #ifdef CONFIG_SOFTMMU
65 # define USING_SOFTMMU 1
66 #else
67 # define USING_SOFTMMU 0
68 #endif
69
70 #ifdef CONFIG_DEBUG_TCG
71 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
72 "%r0",
73 "%r1",
74 "%r2",
75 "%r3",
76 "%r4",
77 "%r5",
78 "%r6",
79 "%r7",
80 "%r8",
81 "%r9",
82 "%r10",
83 "%r11",
84 "%r12",
85 "%r13",
86 "%r14",
87 "%pc",
88 };
89 #endif
90
91 static const int tcg_target_reg_alloc_order[] = {
92 TCG_REG_R4,
93 TCG_REG_R5,
94 TCG_REG_R6,
95 TCG_REG_R7,
96 TCG_REG_R8,
97 TCG_REG_R9,
98 TCG_REG_R10,
99 TCG_REG_R11,
100 TCG_REG_R13,
101 TCG_REG_R0,
102 TCG_REG_R1,
103 TCG_REG_R2,
104 TCG_REG_R3,
105 TCG_REG_R12,
106 TCG_REG_R14,
107 };
108
109 static const int tcg_target_call_iarg_regs[4] = {
110 TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
111 };
112 static const int tcg_target_call_oarg_regs[2] = {
113 TCG_REG_R0, TCG_REG_R1
114 };
115
116 #define TCG_REG_TMP TCG_REG_R12
117
118 static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
119 {
120 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
121 *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
122 }
123
124 static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
125 {
126 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
127 tcg_insn_unit insn = atomic_read(code_ptr);
128 tcg_debug_assert(offset == sextract32(offset, 0, 24));
129 atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
130 }
131
132 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
133 intptr_t value, intptr_t addend)
134 {
135 tcg_debug_assert(type == R_ARM_PC24);
136 tcg_debug_assert(addend == 0);
137 reloc_pc24(code_ptr, (tcg_insn_unit *)value);
138 }
139
140 #define TCG_CT_CONST_ARM 0x100
141 #define TCG_CT_CONST_INV 0x200
142 #define TCG_CT_CONST_NEG 0x400
143 #define TCG_CT_CONST_ZERO 0x800
144
145 /* parse target specific constraints */
146 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
147 {
148 const char *ct_str;
149
150 ct_str = *pct_str;
151 switch (ct_str[0]) {
152 case 'I':
153 ct->ct |= TCG_CT_CONST_ARM;
154 break;
155 case 'K':
156 ct->ct |= TCG_CT_CONST_INV;
157 break;
158 case 'N': /* The gcc constraint letter is L, already used here. */
159 ct->ct |= TCG_CT_CONST_NEG;
160 break;
161 case 'Z':
162 ct->ct |= TCG_CT_CONST_ZERO;
163 break;
164
165 case 'r':
166 ct->ct |= TCG_CT_REG;
167 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
168 break;
169
170 /* qemu_ld address */
171 case 'l':
172 ct->ct |= TCG_CT_REG;
173 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
174 #ifdef CONFIG_SOFTMMU
175 /* r0-r2,lr will be overwritten when reading the tlb entry,
176 so don't use these. */
177 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
178 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
179 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
180 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
181 #endif
182 break;
183
184 /* qemu_st address & data */
185 case 's':
186 ct->ct |= TCG_CT_REG;
187 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
188 /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
189 and r0-r1 doing the byte swapping, so don't use these. */
190 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
191 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
192 #if defined(CONFIG_SOFTMMU)
193 /* Avoid clashes with registers being used for helper args */
194 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
195 #if TARGET_LONG_BITS == 64
196 /* Avoid clashes with registers being used for helper args */
197 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
198 #endif
199 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
200 #endif
201 break;
202
203 default:
204 return -1;
205 }
206 ct_str++;
207 *pct_str = ct_str;
208
209 return 0;
210 }
211
212 static inline uint32_t rotl(uint32_t val, int n)
213 {
214 return (val << n) | (val >> (32 - n));
215 }
216
217 /* ARM immediates for ALU instructions are made of an unsigned 8-bit
218 right-rotated by an even amount between 0 and 30. */
219 static inline int encode_imm(uint32_t imm)
220 {
221 int shift;
222
223 /* simple case, only lower bits */
224 if ((imm & ~0xff) == 0)
225 return 0;
226 /* then try a simple even shift */
227 shift = ctz32(imm) & ~1;
228 if (((imm >> shift) & ~0xff) == 0)
229 return 32 - shift;
230 /* now try harder with rotations */
231 if ((rotl(imm, 2) & ~0xff) == 0)
232 return 2;
233 if ((rotl(imm, 4) & ~0xff) == 0)
234 return 4;
235 if ((rotl(imm, 6) & ~0xff) == 0)
236 return 6;
237 /* imm can't be encoded */
238 return -1;
239 }
240
241 static inline int check_fit_imm(uint32_t imm)
242 {
243 return encode_imm(imm) >= 0;
244 }
245
246 /* Test if a constant matches the constraint.
247 * TODO: define constraints for:
248 *
249 * ldr/str offset: between -0xfff and 0xfff
250 * ldrh/strh offset: between -0xff and 0xff
251 * mov operand2: values represented with x << (2 * y), x < 0x100
252 * add, sub, eor...: ditto
253 */
254 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
255 const TCGArgConstraint *arg_ct)
256 {
257 int ct;
258 ct = arg_ct->ct;
259 if (ct & TCG_CT_CONST) {
260 return 1;
261 } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
262 return 1;
263 } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
264 return 1;
265 } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
266 return 1;
267 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
268 return 1;
269 } else {
270 return 0;
271 }
272 }
273
274 #define TO_CPSR (1 << 20)
275
276 typedef enum {
277 ARITH_AND = 0x0 << 21,
278 ARITH_EOR = 0x1 << 21,
279 ARITH_SUB = 0x2 << 21,
280 ARITH_RSB = 0x3 << 21,
281 ARITH_ADD = 0x4 << 21,
282 ARITH_ADC = 0x5 << 21,
283 ARITH_SBC = 0x6 << 21,
284 ARITH_RSC = 0x7 << 21,
285 ARITH_TST = 0x8 << 21 | TO_CPSR,
286 ARITH_CMP = 0xa << 21 | TO_CPSR,
287 ARITH_CMN = 0xb << 21 | TO_CPSR,
288 ARITH_ORR = 0xc << 21,
289 ARITH_MOV = 0xd << 21,
290 ARITH_BIC = 0xe << 21,
291 ARITH_MVN = 0xf << 21,
292
293 INSN_LDR_IMM = 0x04100000,
294 INSN_LDR_REG = 0x06100000,
295 INSN_STR_IMM = 0x04000000,
296 INSN_STR_REG = 0x06000000,
297
298 INSN_LDRH_IMM = 0x005000b0,
299 INSN_LDRH_REG = 0x001000b0,
300 INSN_LDRSH_IMM = 0x005000f0,
301 INSN_LDRSH_REG = 0x001000f0,
302 INSN_STRH_IMM = 0x004000b0,
303 INSN_STRH_REG = 0x000000b0,
304
305 INSN_LDRB_IMM = 0x04500000,
306 INSN_LDRB_REG = 0x06500000,
307 INSN_LDRSB_IMM = 0x005000d0,
308 INSN_LDRSB_REG = 0x001000d0,
309 INSN_STRB_IMM = 0x04400000,
310 INSN_STRB_REG = 0x06400000,
311
312 INSN_LDRD_IMM = 0x004000d0,
313 INSN_LDRD_REG = 0x000000d0,
314 INSN_STRD_IMM = 0x004000f0,
315 INSN_STRD_REG = 0x000000f0,
316 } ARMInsn;
317
318 #define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
319 #define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20)
320 #define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40)
321 #define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60)
322 #define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10)
323 #define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30)
324 #define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50)
325 #define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70)
326
327 enum arm_cond_code_e {
328 COND_EQ = 0x0,
329 COND_NE = 0x1,
330 COND_CS = 0x2, /* Unsigned greater or equal */
331 COND_CC = 0x3, /* Unsigned less than */
332 COND_MI = 0x4, /* Negative */
333 COND_PL = 0x5, /* Zero or greater */
334 COND_VS = 0x6, /* Overflow */
335 COND_VC = 0x7, /* No overflow */
336 COND_HI = 0x8, /* Unsigned greater than */
337 COND_LS = 0x9, /* Unsigned less or equal */
338 COND_GE = 0xa,
339 COND_LT = 0xb,
340 COND_GT = 0xc,
341 COND_LE = 0xd,
342 COND_AL = 0xe,
343 };
344
345 static const uint8_t tcg_cond_to_arm_cond[] = {
346 [TCG_COND_EQ] = COND_EQ,
347 [TCG_COND_NE] = COND_NE,
348 [TCG_COND_LT] = COND_LT,
349 [TCG_COND_GE] = COND_GE,
350 [TCG_COND_LE] = COND_LE,
351 [TCG_COND_GT] = COND_GT,
352 /* unsigned */
353 [TCG_COND_LTU] = COND_CC,
354 [TCG_COND_GEU] = COND_CS,
355 [TCG_COND_LEU] = COND_LS,
356 [TCG_COND_GTU] = COND_HI,
357 };
358
359 static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
360 {
361 tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
362 }
363
364 static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
365 {
366 tcg_out32(s, (cond << 28) | 0x0a000000 |
367 (((offset - 8) >> 2) & 0x00ffffff));
368 }
369
370 static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
371 {
372 /* We pay attention here to not modify the branch target by masking
373 the corresponding bytes. This ensure that caches and memory are
374 kept coherent during retranslation. */
375 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
376 }
377
378 static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
379 {
380 /* We pay attention here to not modify the branch target by masking
381 the corresponding bytes. This ensure that caches and memory are
382 kept coherent during retranslation. */
383 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
384 }
385
386 static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
387 {
388 tcg_out32(s, (cond << 28) | 0x0b000000 |
389 (((offset - 8) >> 2) & 0x00ffffff));
390 }
391
392 static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
393 {
394 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
395 }
396
397 static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
398 {
399 tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
400 (((offset - 8) >> 2) & 0x00ffffff));
401 }
402
403 static inline void tcg_out_dat_reg(TCGContext *s,
404 int cond, int opc, int rd, int rn, int rm, int shift)
405 {
406 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
407 (rn << 16) | (rd << 12) | shift | rm);
408 }
409
410 static inline void tcg_out_nop(TCGContext *s)
411 {
412 if (use_armv7_instructions) {
413 /* Architected nop introduced in v6k. */
414 /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this
415 also Just So Happened to do nothing on pre-v6k so that we
416 don't need to conditionalize it? */
417 tcg_out32(s, 0xe320f000);
418 } else {
419 /* Prior to that the assembler uses mov r0, r0. */
420 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
421 }
422 }
423
424 static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
425 {
426 /* Simple reg-reg move, optimising out the 'do nothing' case */
427 if (rd != rm) {
428 tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
429 }
430 }
431
432 static inline void tcg_out_dat_imm(TCGContext *s,
433 int cond, int opc, int rd, int rn, int im)
434 {
435 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
436 (rn << 16) | (rd << 12) | im);
437 }
438
439 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
440 {
441 int rot, opc, rn;
442
443 /* For armv7, make sure not to use movw+movt when mov/mvn would do.
444 Speed things up by only checking when movt would be required.
445 Prior to armv7, have one go at fully rotated immediates before
446 doing the decomposition thing below. */
447 if (!use_armv7_instructions || (arg & 0xffff0000)) {
448 rot = encode_imm(arg);
449 if (rot >= 0) {
450 tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
451 rotl(arg, rot) | (rot << 7));
452 return;
453 }
454 rot = encode_imm(~arg);
455 if (rot >= 0) {
456 tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
457 rotl(~arg, rot) | (rot << 7));
458 return;
459 }
460 }
461
462 /* Use movw + movt. */
463 if (use_armv7_instructions) {
464 /* movw */
465 tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
466 | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
467 if (arg & 0xffff0000) {
468 /* movt */
469 tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
470 | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
471 }
472 return;
473 }
474
475 /* TODO: This is very suboptimal, we can easily have a constant
476 pool somewhere after all the instructions. */
477 opc = ARITH_MOV;
478 rn = 0;
479 /* If we have lots of leading 1's, we can shorten the sequence by
480 beginning with mvn and then clearing higher bits with eor. */
481 if (clz32(~arg) > clz32(arg)) {
482 opc = ARITH_MVN, arg = ~arg;
483 }
484 do {
485 int i = ctz32(arg) & ~1;
486 rot = ((32 - i) << 7) & 0xf00;
487 tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
488 arg &= ~(0xff << i);
489
490 opc = ARITH_EOR;
491 rn = rd;
492 } while (arg);
493 }
494
495 static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
496 TCGArg lhs, TCGArg rhs, int rhs_is_const)
497 {
498 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
499 * rhs must satisfy the "rI" constraint.
500 */
501 if (rhs_is_const) {
502 int rot = encode_imm(rhs);
503 tcg_debug_assert(rot >= 0);
504 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
505 } else {
506 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
507 }
508 }
509
510 static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
511 TCGReg dst, TCGReg lhs, TCGArg rhs,
512 bool rhs_is_const)
513 {
514 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
515 * rhs must satisfy the "rIK" constraint.
516 */
517 if (rhs_is_const) {
518 int rot = encode_imm(rhs);
519 if (rot < 0) {
520 rhs = ~rhs;
521 rot = encode_imm(rhs);
522 tcg_debug_assert(rot >= 0);
523 opc = opinv;
524 }
525 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
526 } else {
527 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
528 }
529 }
530
531 static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
532 TCGArg dst, TCGArg lhs, TCGArg rhs,
533 bool rhs_is_const)
534 {
535 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
536 * rhs must satisfy the "rIN" constraint.
537 */
538 if (rhs_is_const) {
539 int rot = encode_imm(rhs);
540 if (rot < 0) {
541 rhs = -rhs;
542 rot = encode_imm(rhs);
543 tcg_debug_assert(rot >= 0);
544 opc = opneg;
545 }
546 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
547 } else {
548 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
549 }
550 }
551
552 static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
553 TCGReg rn, TCGReg rm)
554 {
555 /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */
556 if (!use_armv6_instructions && rd == rn) {
557 if (rd == rm) {
558 /* rd == rn == rm; copy an input to tmp first. */
559 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
560 rm = rn = TCG_REG_TMP;
561 } else {
562 rn = rm;
563 rm = rd;
564 }
565 }
566 /* mul */
567 tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
568 }
569
570 static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
571 TCGReg rd1, TCGReg rn, TCGReg rm)
572 {
573 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
574 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
575 if (rd0 == rm || rd1 == rm) {
576 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
577 rn = TCG_REG_TMP;
578 } else {
579 TCGReg t = rn;
580 rn = rm;
581 rm = t;
582 }
583 }
584 /* umull */
585 tcg_out32(s, (cond << 28) | 0x00800090 |
586 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
587 }
588
589 static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
590 TCGReg rd1, TCGReg rn, TCGReg rm)
591 {
592 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
593 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
594 if (rd0 == rm || rd1 == rm) {
595 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
596 rn = TCG_REG_TMP;
597 } else {
598 TCGReg t = rn;
599 rn = rm;
600 rm = t;
601 }
602 }
603 /* smull */
604 tcg_out32(s, (cond << 28) | 0x00c00090 |
605 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
606 }
607
608 static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
609 {
610 tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
611 }
612
613 static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
614 {
615 tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
616 }
617
618 static inline void tcg_out_ext8s(TCGContext *s, int cond,
619 int rd, int rn)
620 {
621 if (use_armv6_instructions) {
622 /* sxtb */
623 tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
624 } else {
625 tcg_out_dat_reg(s, cond, ARITH_MOV,
626 rd, 0, rn, SHIFT_IMM_LSL(24));
627 tcg_out_dat_reg(s, cond, ARITH_MOV,
628 rd, 0, rd, SHIFT_IMM_ASR(24));
629 }
630 }
631
632 static inline void tcg_out_ext8u(TCGContext *s, int cond,
633 int rd, int rn)
634 {
635 tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
636 }
637
638 static inline void tcg_out_ext16s(TCGContext *s, int cond,
639 int rd, int rn)
640 {
641 if (use_armv6_instructions) {
642 /* sxth */
643 tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
644 } else {
645 tcg_out_dat_reg(s, cond, ARITH_MOV,
646 rd, 0, rn, SHIFT_IMM_LSL(16));
647 tcg_out_dat_reg(s, cond, ARITH_MOV,
648 rd, 0, rd, SHIFT_IMM_ASR(16));
649 }
650 }
651
652 static inline void tcg_out_ext16u(TCGContext *s, int cond,
653 int rd, int rn)
654 {
655 if (use_armv6_instructions) {
656 /* uxth */
657 tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
658 } else {
659 tcg_out_dat_reg(s, cond, ARITH_MOV,
660 rd, 0, rn, SHIFT_IMM_LSL(16));
661 tcg_out_dat_reg(s, cond, ARITH_MOV,
662 rd, 0, rd, SHIFT_IMM_LSR(16));
663 }
664 }
665
666 static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
667 {
668 if (use_armv6_instructions) {
669 /* revsh */
670 tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
671 } else {
672 tcg_out_dat_reg(s, cond, ARITH_MOV,
673 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
674 tcg_out_dat_reg(s, cond, ARITH_MOV,
675 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
676 tcg_out_dat_reg(s, cond, ARITH_ORR,
677 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
678 }
679 }
680
681 static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
682 {
683 if (use_armv6_instructions) {
684 /* rev16 */
685 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
686 } else {
687 tcg_out_dat_reg(s, cond, ARITH_MOV,
688 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
689 tcg_out_dat_reg(s, cond, ARITH_MOV,
690 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
691 tcg_out_dat_reg(s, cond, ARITH_ORR,
692 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
693 }
694 }
695
696 /* swap the two low bytes assuming that the two high input bytes and the
697 two high output bit can hold any value. */
698 static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
699 {
700 if (use_armv6_instructions) {
701 /* rev16 */
702 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
703 } else {
704 tcg_out_dat_reg(s, cond, ARITH_MOV,
705 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
706 tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
707 tcg_out_dat_reg(s, cond, ARITH_ORR,
708 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
709 }
710 }
711
712 static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
713 {
714 if (use_armv6_instructions) {
715 /* rev */
716 tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
717 } else {
718 tcg_out_dat_reg(s, cond, ARITH_EOR,
719 TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
720 tcg_out_dat_imm(s, cond, ARITH_BIC,
721 TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
722 tcg_out_dat_reg(s, cond, ARITH_MOV,
723 rd, 0, rn, SHIFT_IMM_ROR(8));
724 tcg_out_dat_reg(s, cond, ARITH_EOR,
725 rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
726 }
727 }
728
729 bool tcg_target_deposit_valid(int ofs, int len)
730 {
731 /* ??? Without bfi, we could improve over generic code by combining
732 the right-shift from a non-zero ofs with the orr. We do run into
733 problems when rd == rs, and the mask generated from ofs+len doesn't
734 fit into an immediate. We would have to be careful not to pessimize
735 wrt the optimizations performed on the expanded code. */
736 return use_armv7_instructions;
737 }
738
739 static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
740 TCGArg a1, int ofs, int len, bool const_a1)
741 {
742 if (const_a1) {
743 /* bfi becomes bfc with rn == 15. */
744 a1 = 15;
745 }
746 /* bfi/bfc */
747 tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
748 | (ofs << 7) | ((ofs + len - 1) << 16));
749 }
750
751 /* Note that this routine is used for both LDR and LDRH formats, so we do
752 not wish to include an immediate shift at this point. */
753 static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
754 TCGReg rn, TCGReg rm, bool u, bool p, bool w)
755 {
756 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
757 | (w << 21) | (rn << 16) | (rt << 12) | rm);
758 }
759
760 static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
761 TCGReg rn, int imm8, bool p, bool w)
762 {
763 bool u = 1;
764 if (imm8 < 0) {
765 imm8 = -imm8;
766 u = 0;
767 }
768 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
769 (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
770 }
771
772 static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
773 TCGReg rn, int imm12, bool p, bool w)
774 {
775 bool u = 1;
776 if (imm12 < 0) {
777 imm12 = -imm12;
778 u = 0;
779 }
780 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
781 (rn << 16) | (rt << 12) | imm12);
782 }
783
784 static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
785 TCGReg rn, int imm12)
786 {
787 tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
788 }
789
790 static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
791 TCGReg rn, int imm12)
792 {
793 tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
794 }
795
796 static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
797 TCGReg rn, TCGReg rm)
798 {
799 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
800 }
801
802 static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
803 TCGReg rn, TCGReg rm)
804 {
805 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
806 }
807
808 static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
809 TCGReg rn, int imm8)
810 {
811 tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
812 }
813
814 static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
815 TCGReg rn, TCGReg rm)
816 {
817 tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
818 }
819
820 static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
821 TCGReg rn, int imm8)
822 {
823 tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
824 }
825
826 static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
827 TCGReg rn, TCGReg rm)
828 {
829 tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
830 }
831
832 /* Register pre-increment with base writeback. */
833 static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
834 TCGReg rn, TCGReg rm)
835 {
836 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
837 }
838
839 static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
840 TCGReg rn, TCGReg rm)
841 {
842 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
843 }
844
845 static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
846 TCGReg rn, int imm8)
847 {
848 tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
849 }
850
851 static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
852 TCGReg rn, int imm8)
853 {
854 tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
855 }
856
857 static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
858 TCGReg rn, TCGReg rm)
859 {
860 tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
861 }
862
863 static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
864 TCGReg rn, TCGReg rm)
865 {
866 tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
867 }
868
869 static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
870 TCGReg rn, int imm8)
871 {
872 tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
873 }
874
875 static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
876 TCGReg rn, TCGReg rm)
877 {
878 tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
879 }
880
881 static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
882 TCGReg rn, int imm12)
883 {
884 tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
885 }
886
887 static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
888 TCGReg rn, int imm12)
889 {
890 tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
891 }
892
893 static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
894 TCGReg rn, TCGReg rm)
895 {
896 tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
897 }
898
899 static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
900 TCGReg rn, TCGReg rm)
901 {
902 tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
903 }
904
905 static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
906 TCGReg rn, int imm8)
907 {
908 tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
909 }
910
911 static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
912 TCGReg rn, TCGReg rm)
913 {
914 tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
915 }
916
917 static inline void tcg_out_ld32u(TCGContext *s, int cond,
918 int rd, int rn, int32_t offset)
919 {
920 if (offset > 0xfff || offset < -0xfff) {
921 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
922 tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
923 } else
924 tcg_out_ld32_12(s, cond, rd, rn, offset);
925 }
926
927 static inline void tcg_out_st32(TCGContext *s, int cond,
928 int rd, int rn, int32_t offset)
929 {
930 if (offset > 0xfff || offset < -0xfff) {
931 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
932 tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
933 } else
934 tcg_out_st32_12(s, cond, rd, rn, offset);
935 }
936
937 static inline void tcg_out_ld16u(TCGContext *s, int cond,
938 int rd, int rn, int32_t offset)
939 {
940 if (offset > 0xff || offset < -0xff) {
941 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
942 tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
943 } else
944 tcg_out_ld16u_8(s, cond, rd, rn, offset);
945 }
946
947 static inline void tcg_out_ld16s(TCGContext *s, int cond,
948 int rd, int rn, int32_t offset)
949 {
950 if (offset > 0xff || offset < -0xff) {
951 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
952 tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
953 } else
954 tcg_out_ld16s_8(s, cond, rd, rn, offset);
955 }
956
957 static inline void tcg_out_st16(TCGContext *s, int cond,
958 int rd, int rn, int32_t offset)
959 {
960 if (offset > 0xff || offset < -0xff) {
961 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
962 tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
963 } else
964 tcg_out_st16_8(s, cond, rd, rn, offset);
965 }
966
967 static inline void tcg_out_ld8u(TCGContext *s, int cond,
968 int rd, int rn, int32_t offset)
969 {
970 if (offset > 0xfff || offset < -0xfff) {
971 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
972 tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
973 } else
974 tcg_out_ld8_12(s, cond, rd, rn, offset);
975 }
976
977 static inline void tcg_out_ld8s(TCGContext *s, int cond,
978 int rd, int rn, int32_t offset)
979 {
980 if (offset > 0xff || offset < -0xff) {
981 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
982 tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
983 } else
984 tcg_out_ld8s_8(s, cond, rd, rn, offset);
985 }
986
987 static inline void tcg_out_st8(TCGContext *s, int cond,
988 int rd, int rn, int32_t offset)
989 {
990 if (offset > 0xfff || offset < -0xfff) {
991 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
992 tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
993 } else
994 tcg_out_st8_12(s, cond, rd, rn, offset);
995 }
996
997 /* The _goto case is normally between TBs within the same code buffer, and
998 * with the code buffer limited to 16MB we wouldn't need the long case.
999 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1000 */
1001 static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
1002 {
1003 intptr_t addri = (intptr_t)addr;
1004 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1005
1006 if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1007 tcg_out_b(s, cond, disp);
1008 return;
1009 }
1010
1011 tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
1012 if (use_armv5t_instructions) {
1013 tcg_out_bx(s, cond, TCG_REG_TMP);
1014 } else {
1015 if (addri & 1) {
1016 tcg_abort();
1017 }
1018 tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
1019 }
1020 }
1021
1022 /* The call case is mostly used for helpers - so it's not unreasonable
1023 * for them to be beyond branch range */
1024 static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1025 {
1026 intptr_t addri = (intptr_t)addr;
1027 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1028
1029 if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1030 if (addri & 1) {
1031 /* Use BLX if the target is in Thumb mode */
1032 if (!use_armv5t_instructions) {
1033 tcg_abort();
1034 }
1035 tcg_out_blx_imm(s, disp);
1036 } else {
1037 tcg_out_bl(s, COND_AL, disp);
1038 }
1039 } else if (use_armv7_instructions) {
1040 tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1041 tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1042 } else {
1043 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1044 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1045 tcg_out32(s, addri);
1046 }
1047 }
1048
1049 void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
1050 {
1051 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
1052 tcg_insn_unit *target = (tcg_insn_unit *)addr;
1053
1054 /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
1055 reloc_pc24_atomic(code_ptr, target);
1056 flush_icache_range(jmp_addr, jmp_addr + 4);
1057 }
1058
1059 static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1060 {
1061 if (l->has_value) {
1062 tcg_out_goto(s, cond, l->u.value_ptr);
1063 } else {
1064 tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1065 tcg_out_b_noaddr(s, cond);
1066 }
1067 }
1068
1069 #ifdef CONFIG_SOFTMMU
1070 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1071 * int mmu_idx, uintptr_t ra)
1072 */
1073 static void * const qemu_ld_helpers[16] = {
1074 [MO_UB] = helper_ret_ldub_mmu,
1075 [MO_SB] = helper_ret_ldsb_mmu,
1076
1077 [MO_LEUW] = helper_le_lduw_mmu,
1078 [MO_LEUL] = helper_le_ldul_mmu,
1079 [MO_LEQ] = helper_le_ldq_mmu,
1080 [MO_LESW] = helper_le_ldsw_mmu,
1081 [MO_LESL] = helper_le_ldul_mmu,
1082
1083 [MO_BEUW] = helper_be_lduw_mmu,
1084 [MO_BEUL] = helper_be_ldul_mmu,
1085 [MO_BEQ] = helper_be_ldq_mmu,
1086 [MO_BESW] = helper_be_ldsw_mmu,
1087 [MO_BESL] = helper_be_ldul_mmu,
1088 };
1089
1090 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1091 * uintxx_t val, int mmu_idx, uintptr_t ra)
1092 */
1093 static void * const qemu_st_helpers[16] = {
1094 [MO_UB] = helper_ret_stb_mmu,
1095 [MO_LEUW] = helper_le_stw_mmu,
1096 [MO_LEUL] = helper_le_stl_mmu,
1097 [MO_LEQ] = helper_le_stq_mmu,
1098 [MO_BEUW] = helper_be_stw_mmu,
1099 [MO_BEUL] = helper_be_stl_mmu,
1100 [MO_BEQ] = helper_be_stq_mmu,
1101 };
1102
1103 /* Helper routines for marshalling helper function arguments into
1104 * the correct registers and stack.
1105 * argreg is where we want to put this argument, arg is the argument itself.
1106 * Return value is the updated argreg ready for the next call.
1107 * Note that argreg 0..3 is real registers, 4+ on stack.
1108 *
1109 * We provide routines for arguments which are: immediate, 32 bit
1110 * value in register, 16 and 8 bit values in register (which must be zero
1111 * extended before use) and 64 bit value in a lo:hi register pair.
1112 */
1113 #define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
1114 static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
1115 { \
1116 if (argreg < 4) { \
1117 MOV_ARG(s, COND_AL, argreg, arg); \
1118 } else { \
1119 int ofs = (argreg - 4) * 4; \
1120 EXT_ARG; \
1121 tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
1122 tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
1123 } \
1124 return argreg + 1; \
1125 }
1126
1127 DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1128 (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1129 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1130 (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1131 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1132 (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1133 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1134
1135 static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1136 TCGReg arglo, TCGReg arghi)
1137 {
1138 /* 64 bit arguments must go in even/odd register pairs
1139 * and in 8-aligned stack slots.
1140 */
1141 if (argreg & 1) {
1142 argreg++;
1143 }
1144 if (use_armv6_instructions && argreg >= 4
1145 && (arglo & 1) == 0 && arghi == arglo + 1) {
1146 tcg_out_strd_8(s, COND_AL, arglo,
1147 TCG_REG_CALL_STACK, (argreg - 4) * 4);
1148 return argreg + 2;
1149 } else {
1150 argreg = tcg_out_arg_reg32(s, argreg, arglo);
1151 argreg = tcg_out_arg_reg32(s, argreg, arghi);
1152 return argreg;
1153 }
1154 }
1155
1156 #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1157
1158 /* We're expecting to use an 8-bit immediate and to mask. */
1159 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1160
1161 /* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1162 Using the offset of the second entry in the last tlb table ensures
1163 that we can index all of the elements of the first entry. */
1164 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1165 > 0xffff);
1166
1167 /* Load and compare a TLB entry, leaving the flags set. Returns the register
1168 containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
1169
1170 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1171 TCGMemOp opc, int mem_index, bool is_load)
1172 {
1173 TCGReg base = TCG_AREG0;
1174 int cmp_off =
1175 (is_load
1176 ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1177 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1178 int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1179 unsigned s_bits = opc & MO_SIZE;
1180 unsigned a_bits = get_alignment_bits(opc);
1181
1182 /* Should generate something like the following:
1183 * shr tmp, addrlo, #TARGET_PAGE_BITS (1)
1184 * add r2, env, #high
1185 * and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
1186 * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
1187 * ldr r0, [r2, #cmp] (4)
1188 * tst addrlo, #s_mask
1189 * ldr r2, [r2, #add] (5)
1190 * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
1191 */
1192 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1193 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1194
1195 /* We checked that the offset is contained within 16 bits above. */
1196 if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1197 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1198 (24 << 7) | (cmp_off >> 8));
1199 base = TCG_REG_R2;
1200 add_off -= cmp_off & 0xff00;
1201 cmp_off &= 0xff;
1202 }
1203
1204 tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1205 TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1206 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1207 TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1208
1209 /* Load the tlb comparator. Use ldrd if needed and available,
1210 but due to how the pointer needs setting up, ldm isn't useful.
1211 Base arm5 doesn't have ldrd, but armv5te does. */
1212 if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1213 tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1214 } else {
1215 tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1216 if (TARGET_LONG_BITS == 64) {
1217 tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1218 }
1219 }
1220
1221 /* Check alignment. We don't support inline unaligned acceses,
1222 but we can easily support overalignment checks. */
1223 if (a_bits < s_bits) {
1224 a_bits = s_bits;
1225 }
1226 if (a_bits) {
1227 tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
1228 }
1229
1230 /* Load the tlb addend. */
1231 tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1232
1233 tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1234 TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1235
1236 if (TARGET_LONG_BITS == 64) {
1237 tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1238 TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1239 }
1240
1241 return TCG_REG_R2;
1242 }
1243
1244 /* Record the context of a call to the out of line helper code for the slow
1245 path for a load or store, so that we can later generate the correct
1246 helper code. */
1247 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1248 TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1249 TCGReg addrhi, tcg_insn_unit *raddr,
1250 tcg_insn_unit *label_ptr)
1251 {
1252 TCGLabelQemuLdst *label = new_ldst_label(s);
1253
1254 label->is_ld = is_ld;
1255 label->oi = oi;
1256 label->datalo_reg = datalo;
1257 label->datahi_reg = datahi;
1258 label->addrlo_reg = addrlo;
1259 label->addrhi_reg = addrhi;
1260 label->raddr = raddr;
1261 label->label_ptr[0] = label_ptr;
1262 }
1263
1264 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1265 {
1266 TCGReg argreg, datalo, datahi;
1267 TCGMemOpIdx oi = lb->oi;
1268 TCGMemOp opc = get_memop(oi);
1269 void *func;
1270
1271 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1272
1273 argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1274 if (TARGET_LONG_BITS == 64) {
1275 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1276 } else {
1277 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1278 }
1279 argreg = tcg_out_arg_imm32(s, argreg, oi);
1280 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1281
1282 /* For armv6 we can use the canonical unsigned helpers and minimize
1283 icache usage. For pre-armv6, use the signed helpers since we do
1284 not have a single insn sign-extend. */
1285 if (use_armv6_instructions) {
1286 func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1287 } else {
1288 func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1289 if (opc & MO_SIGN) {
1290 opc = MO_UL;
1291 }
1292 }
1293 tcg_out_call(s, func);
1294
1295 datalo = lb->datalo_reg;
1296 datahi = lb->datahi_reg;
1297 switch (opc & MO_SSIZE) {
1298 case MO_SB:
1299 tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1300 break;
1301 case MO_SW:
1302 tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1303 break;
1304 default:
1305 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1306 break;
1307 case MO_Q:
1308 if (datalo != TCG_REG_R1) {
1309 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1310 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1311 } else if (datahi != TCG_REG_R0) {
1312 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1313 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1314 } else {
1315 tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1316 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1317 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1318 }
1319 break;
1320 }
1321
1322 tcg_out_goto(s, COND_AL, lb->raddr);
1323 }
1324
1325 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1326 {
1327 TCGReg argreg, datalo, datahi;
1328 TCGMemOpIdx oi = lb->oi;
1329 TCGMemOp opc = get_memop(oi);
1330
1331 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1332
1333 argreg = TCG_REG_R0;
1334 argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1335 if (TARGET_LONG_BITS == 64) {
1336 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1337 } else {
1338 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1339 }
1340
1341 datalo = lb->datalo_reg;
1342 datahi = lb->datahi_reg;
1343 switch (opc & MO_SIZE) {
1344 case MO_8:
1345 argreg = tcg_out_arg_reg8(s, argreg, datalo);
1346 break;
1347 case MO_16:
1348 argreg = tcg_out_arg_reg16(s, argreg, datalo);
1349 break;
1350 case MO_32:
1351 default:
1352 argreg = tcg_out_arg_reg32(s, argreg, datalo);
1353 break;
1354 case MO_64:
1355 argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1356 break;
1357 }
1358
1359 argreg = tcg_out_arg_imm32(s, argreg, oi);
1360 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1361
1362 /* Tail-call to the helper, which will return to the fast path. */
1363 tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1364 }
1365 #endif /* SOFTMMU */
1366
1367 static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1368 TCGReg datalo, TCGReg datahi,
1369 TCGReg addrlo, TCGReg addend)
1370 {
1371 TCGMemOp bswap = opc & MO_BSWAP;
1372
1373 switch (opc & MO_SSIZE) {
1374 case MO_UB:
1375 tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1376 break;
1377 case MO_SB:
1378 tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1379 break;
1380 case MO_UW:
1381 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1382 if (bswap) {
1383 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1384 }
1385 break;
1386 case MO_SW:
1387 if (bswap) {
1388 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1389 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1390 } else {
1391 tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1392 }
1393 break;
1394 case MO_UL:
1395 default:
1396 tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1397 if (bswap) {
1398 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1399 }
1400 break;
1401 case MO_Q:
1402 {
1403 TCGReg dl = (bswap ? datahi : datalo);
1404 TCGReg dh = (bswap ? datalo : datahi);
1405
1406 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1407 if (USING_SOFTMMU && use_armv6_instructions
1408 && (dl & 1) == 0 && dh == dl + 1) {
1409 tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1410 } else if (dl != addend) {
1411 tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1412 tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1413 } else {
1414 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1415 addend, addrlo, SHIFT_IMM_LSL(0));
1416 tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1417 tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1418 }
1419 if (bswap) {
1420 tcg_out_bswap32(s, COND_AL, dl, dl);
1421 tcg_out_bswap32(s, COND_AL, dh, dh);
1422 }
1423 }
1424 break;
1425 }
1426 }
1427
1428 static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1429 TCGReg datalo, TCGReg datahi,
1430 TCGReg addrlo)
1431 {
1432 TCGMemOp bswap = opc & MO_BSWAP;
1433
1434 switch (opc & MO_SSIZE) {
1435 case MO_UB:
1436 tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1437 break;
1438 case MO_SB:
1439 tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1440 break;
1441 case MO_UW:
1442 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1443 if (bswap) {
1444 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1445 }
1446 break;
1447 case MO_SW:
1448 if (bswap) {
1449 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1450 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1451 } else {
1452 tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1453 }
1454 break;
1455 case MO_UL:
1456 default:
1457 tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1458 if (bswap) {
1459 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1460 }
1461 break;
1462 case MO_Q:
1463 {
1464 TCGReg dl = (bswap ? datahi : datalo);
1465 TCGReg dh = (bswap ? datalo : datahi);
1466
1467 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1468 if (USING_SOFTMMU && use_armv6_instructions
1469 && (dl & 1) == 0 && dh == dl + 1) {
1470 tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1471 } else if (dl == addrlo) {
1472 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1473 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1474 } else {
1475 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1476 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1477 }
1478 if (bswap) {
1479 tcg_out_bswap32(s, COND_AL, dl, dl);
1480 tcg_out_bswap32(s, COND_AL, dh, dh);
1481 }
1482 }
1483 break;
1484 }
1485 }
1486
1487 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1488 {
1489 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1490 TCGMemOpIdx oi;
1491 TCGMemOp opc;
1492 #ifdef CONFIG_SOFTMMU
1493 int mem_index;
1494 TCGReg addend;
1495 tcg_insn_unit *label_ptr;
1496 #endif
1497
1498 datalo = *args++;
1499 datahi = (is64 ? *args++ : 0);
1500 addrlo = *args++;
1501 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1502 oi = *args++;
1503 opc = get_memop(oi);
1504
1505 #ifdef CONFIG_SOFTMMU
1506 mem_index = get_mmuidx(oi);
1507 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
1508
1509 /* This a conditional BL only to load a pointer within this opcode into LR
1510 for the slow path. We will not be using the value for a tail call. */
1511 label_ptr = s->code_ptr;
1512 tcg_out_bl_noaddr(s, COND_NE);
1513
1514 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1515
1516 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1517 s->code_ptr, label_ptr);
1518 #else /* !CONFIG_SOFTMMU */
1519 if (guest_base) {
1520 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1521 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1522 } else {
1523 tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1524 }
1525 #endif
1526 }
1527
1528 static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1529 TCGReg datalo, TCGReg datahi,
1530 TCGReg addrlo, TCGReg addend)
1531 {
1532 TCGMemOp bswap = opc & MO_BSWAP;
1533
1534 switch (opc & MO_SIZE) {
1535 case MO_8:
1536 tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1537 break;
1538 case MO_16:
1539 if (bswap) {
1540 tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1541 tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1542 } else {
1543 tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1544 }
1545 break;
1546 case MO_32:
1547 default:
1548 if (bswap) {
1549 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1550 tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1551 } else {
1552 tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1553 }
1554 break;
1555 case MO_64:
1556 /* Avoid strd for user-only emulation, to handle unaligned. */
1557 if (bswap) {
1558 tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1559 tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1560 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1561 tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1562 } else if (USING_SOFTMMU && use_armv6_instructions
1563 && (datalo & 1) == 0 && datahi == datalo + 1) {
1564 tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1565 } else {
1566 tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1567 tcg_out_st32_12(s, cond, datahi, addend, 4);
1568 }
1569 break;
1570 }
1571 }
1572
1573 static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1574 TCGReg datalo, TCGReg datahi,
1575 TCGReg addrlo)
1576 {
1577 TCGMemOp bswap = opc & MO_BSWAP;
1578
1579 switch (opc & MO_SIZE) {
1580 case MO_8:
1581 tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1582 break;
1583 case MO_16:
1584 if (bswap) {
1585 tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1586 tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1587 } else {
1588 tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1589 }
1590 break;
1591 case MO_32:
1592 default:
1593 if (bswap) {
1594 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1595 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1596 } else {
1597 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1598 }
1599 break;
1600 case MO_64:
1601 /* Avoid strd for user-only emulation, to handle unaligned. */
1602 if (bswap) {
1603 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1604 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1605 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1606 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1607 } else if (USING_SOFTMMU && use_armv6_instructions
1608 && (datalo & 1) == 0 && datahi == datalo + 1) {
1609 tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1610 } else {
1611 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1612 tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1613 }
1614 break;
1615 }
1616 }
1617
1618 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1619 {
1620 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1621 TCGMemOpIdx oi;
1622 TCGMemOp opc;
1623 #ifdef CONFIG_SOFTMMU
1624 int mem_index;
1625 TCGReg addend;
1626 tcg_insn_unit *label_ptr;
1627 #endif
1628
1629 datalo = *args++;
1630 datahi = (is64 ? *args++ : 0);
1631 addrlo = *args++;
1632 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1633 oi = *args++;
1634 opc = get_memop(oi);
1635
1636 #ifdef CONFIG_SOFTMMU
1637 mem_index = get_mmuidx(oi);
1638 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
1639
1640 tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1641
1642 /* The conditional call must come last, as we're going to return here. */
1643 label_ptr = s->code_ptr;
1644 tcg_out_bl_noaddr(s, COND_NE);
1645
1646 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1647 s->code_ptr, label_ptr);
1648 #else /* !CONFIG_SOFTMMU */
1649 if (guest_base) {
1650 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1651 tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1652 datahi, addrlo, TCG_REG_TMP);
1653 } else {
1654 tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1655 }
1656 #endif
1657 }
1658
1659 static tcg_insn_unit *tb_ret_addr;
1660
1661 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1662 const TCGArg *args, const int *const_args)
1663 {
1664 TCGArg a0, a1, a2, a3, a4, a5;
1665 int c;
1666
1667 switch (opc) {
1668 case INDEX_op_exit_tb:
1669 tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1670 tcg_out_goto(s, COND_AL, tb_ret_addr);
1671 break;
1672 case INDEX_op_goto_tb:
1673 if (s->tb_jmp_insn_offset) {
1674 /* Direct jump method */
1675 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
1676 tcg_out_b_noaddr(s, COND_AL);
1677 } else {
1678 /* Indirect jump method */
1679 intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1680 tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
1681 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
1682 }
1683 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1684 break;
1685 case INDEX_op_br:
1686 tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1687 break;
1688
1689 case INDEX_op_ld8u_i32:
1690 tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1691 break;
1692 case INDEX_op_ld8s_i32:
1693 tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1694 break;
1695 case INDEX_op_ld16u_i32:
1696 tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1697 break;
1698 case INDEX_op_ld16s_i32:
1699 tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1700 break;
1701 case INDEX_op_ld_i32:
1702 tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1703 break;
1704 case INDEX_op_st8_i32:
1705 tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1706 break;
1707 case INDEX_op_st16_i32:
1708 tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1709 break;
1710 case INDEX_op_st_i32:
1711 tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1712 break;
1713
1714 case INDEX_op_movcond_i32:
1715 /* Constraints mean that v2 is always in the same register as dest,
1716 * so we only need to do "if condition passed, move v1 to dest".
1717 */
1718 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1719 args[1], args[2], const_args[2]);
1720 tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1721 ARITH_MVN, args[0], 0, args[3], const_args[3]);
1722 break;
1723 case INDEX_op_add_i32:
1724 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1725 args[0], args[1], args[2], const_args[2]);
1726 break;
1727 case INDEX_op_sub_i32:
1728 if (const_args[1]) {
1729 if (const_args[2]) {
1730 tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1731 } else {
1732 tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1733 args[0], args[2], args[1], 1);
1734 }
1735 } else {
1736 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1737 args[0], args[1], args[2], const_args[2]);
1738 }
1739 break;
1740 case INDEX_op_and_i32:
1741 tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1742 args[0], args[1], args[2], const_args[2]);
1743 break;
1744 case INDEX_op_andc_i32:
1745 tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1746 args[0], args[1], args[2], const_args[2]);
1747 break;
1748 case INDEX_op_or_i32:
1749 c = ARITH_ORR;
1750 goto gen_arith;
1751 case INDEX_op_xor_i32:
1752 c = ARITH_EOR;
1753 /* Fall through. */
1754 gen_arith:
1755 tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1756 break;
1757 case INDEX_op_add2_i32:
1758 a0 = args[0], a1 = args[1], a2 = args[2];
1759 a3 = args[3], a4 = args[4], a5 = args[5];
1760 if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1761 a0 = TCG_REG_TMP;
1762 }
1763 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1764 a0, a2, a4, const_args[4]);
1765 tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1766 a1, a3, a5, const_args[5]);
1767 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1768 break;
1769 case INDEX_op_sub2_i32:
1770 a0 = args[0], a1 = args[1], a2 = args[2];
1771 a3 = args[3], a4 = args[4], a5 = args[5];
1772 if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1773 a0 = TCG_REG_TMP;
1774 }
1775 if (const_args[2]) {
1776 if (const_args[4]) {
1777 tcg_out_movi32(s, COND_AL, a0, a4);
1778 a4 = a0;
1779 }
1780 tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1781 } else {
1782 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1783 ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1784 }
1785 if (const_args[3]) {
1786 if (const_args[5]) {
1787 tcg_out_movi32(s, COND_AL, a1, a5);
1788 a5 = a1;
1789 }
1790 tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1791 } else {
1792 tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1793 a1, a3, a5, const_args[5]);
1794 }
1795 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1796 break;
1797 case INDEX_op_neg_i32:
1798 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1799 break;
1800 case INDEX_op_not_i32:
1801 tcg_out_dat_reg(s, COND_AL,
1802 ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1803 break;
1804 case INDEX_op_mul_i32:
1805 tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1806 break;
1807 case INDEX_op_mulu2_i32:
1808 tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1809 break;
1810 case INDEX_op_muls2_i32:
1811 tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1812 break;
1813 /* XXX: Perhaps args[2] & 0x1f is wrong */
1814 case INDEX_op_shl_i32:
1815 c = const_args[2] ?
1816 SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1817 goto gen_shift32;
1818 case INDEX_op_shr_i32:
1819 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1820 SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1821 goto gen_shift32;
1822 case INDEX_op_sar_i32:
1823 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1824 SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1825 goto gen_shift32;
1826 case INDEX_op_rotr_i32:
1827 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1828 SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1829 /* Fall through. */
1830 gen_shift32:
1831 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1832 break;
1833
1834 case INDEX_op_rotl_i32:
1835 if (const_args[2]) {
1836 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1837 ((0x20 - args[2]) & 0x1f) ?
1838 SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1839 SHIFT_IMM_LSL(0));
1840 } else {
1841 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1842 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1843 SHIFT_REG_ROR(TCG_REG_TMP));
1844 }
1845 break;
1846
1847 case INDEX_op_brcond_i32:
1848 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1849 args[0], args[1], const_args[1]);
1850 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
1851 arg_label(args[3]));
1852 break;
1853 case INDEX_op_brcond2_i32:
1854 /* The resulting conditions are:
1855 * TCG_COND_EQ --> a0 == a2 && a1 == a3,
1856 * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3,
1857 * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3,
1858 * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1859 * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1860 * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3,
1861 */
1862 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1863 args[1], args[3], const_args[3]);
1864 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1865 args[0], args[2], const_args[2]);
1866 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
1867 arg_label(args[5]));
1868 break;
1869 case INDEX_op_setcond_i32:
1870 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1871 args[1], args[2], const_args[2]);
1872 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1873 ARITH_MOV, args[0], 0, 1);
1874 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1875 ARITH_MOV, args[0], 0, 0);
1876 break;
1877 case INDEX_op_setcond2_i32:
1878 /* See brcond2_i32 comment */
1879 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1880 args[2], args[4], const_args[4]);
1881 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1882 args[1], args[3], const_args[3]);
1883 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1884 ARITH_MOV, args[0], 0, 1);
1885 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1886 ARITH_MOV, args[0], 0, 0);
1887 break;
1888
1889 case INDEX_op_qemu_ld_i32:
1890 tcg_out_qemu_ld(s, args, 0);
1891 break;
1892 case INDEX_op_qemu_ld_i64:
1893 tcg_out_qemu_ld(s, args, 1);
1894 break;
1895 case INDEX_op_qemu_st_i32:
1896 tcg_out_qemu_st(s, args, 0);
1897 break;
1898 case INDEX_op_qemu_st_i64:
1899 tcg_out_qemu_st(s, args, 1);
1900 break;
1901
1902 case INDEX_op_bswap16_i32:
1903 tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1904 break;
1905 case INDEX_op_bswap32_i32:
1906 tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1907 break;
1908
1909 case INDEX_op_ext8s_i32:
1910 tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1911 break;
1912 case INDEX_op_ext16s_i32:
1913 tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1914 break;
1915 case INDEX_op_ext16u_i32:
1916 tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1917 break;
1918
1919 case INDEX_op_deposit_i32:
1920 tcg_out_deposit(s, COND_AL, args[0], args[2],
1921 args[3], args[4], const_args[2]);
1922 break;
1923
1924 case INDEX_op_div_i32:
1925 tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1926 break;
1927 case INDEX_op_divu_i32:
1928 tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1929 break;
1930
1931 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1932 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1933 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1934 default:
1935 tcg_abort();
1936 }
1937 }
1938
1939 static const TCGTargetOpDef arm_op_defs[] = {
1940 { INDEX_op_exit_tb, { } },
1941 { INDEX_op_goto_tb, { } },
1942 { INDEX_op_br, { } },
1943
1944 { INDEX_op_ld8u_i32, { "r", "r" } },
1945 { INDEX_op_ld8s_i32, { "r", "r" } },
1946 { INDEX_op_ld16u_i32, { "r", "r" } },
1947 { INDEX_op_ld16s_i32, { "r", "r" } },
1948 { INDEX_op_ld_i32, { "r", "r" } },
1949 { INDEX_op_st8_i32, { "r", "r" } },
1950 { INDEX_op_st16_i32, { "r", "r" } },
1951 { INDEX_op_st_i32, { "r", "r" } },
1952
1953 /* TODO: "r", "r", "ri" */
1954 { INDEX_op_add_i32, { "r", "r", "rIN" } },
1955 { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1956 { INDEX_op_mul_i32, { "r", "r", "r" } },
1957 { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1958 { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1959 { INDEX_op_and_i32, { "r", "r", "rIK" } },
1960 { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1961 { INDEX_op_or_i32, { "r", "r", "rI" } },
1962 { INDEX_op_xor_i32, { "r", "r", "rI" } },
1963 { INDEX_op_neg_i32, { "r", "r" } },
1964 { INDEX_op_not_i32, { "r", "r" } },
1965
1966 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1967 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1968 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1969 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1970 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1971
1972 { INDEX_op_brcond_i32, { "r", "rIN" } },
1973 { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
1974 { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
1975
1976 { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
1977 { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
1978 { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
1979 { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
1980
1981 #if TARGET_LONG_BITS == 32
1982 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1983 { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
1984 { INDEX_op_qemu_st_i32, { "s", "s" } },
1985 { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
1986 #else
1987 { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
1988 { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
1989 { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
1990 { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
1991 #endif
1992
1993 { INDEX_op_bswap16_i32, { "r", "r" } },
1994 { INDEX_op_bswap32_i32, { "r", "r" } },
1995
1996 { INDEX_op_ext8s_i32, { "r", "r" } },
1997 { INDEX_op_ext16s_i32, { "r", "r" } },
1998 { INDEX_op_ext16u_i32, { "r", "r" } },
1999
2000 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
2001
2002 { INDEX_op_div_i32, { "r", "r", "r" } },
2003 { INDEX_op_divu_i32, { "r", "r", "r" } },
2004
2005 { -1 },
2006 };
2007
2008 static void tcg_target_init(TCGContext *s)
2009 {
2010 /* Only probe for the platform and capabilities if we havn't already
2011 determined maximum values at compile time. */
2012 #ifndef use_idiv_instructions
2013 {
2014 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2015 use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2016 }
2017 #endif
2018 if (__ARM_ARCH < 7) {
2019 const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2020 if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2021 arm_arch = pl[1] - '0';
2022 }
2023 }
2024
2025 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2026 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2027 (1 << TCG_REG_R0) |
2028 (1 << TCG_REG_R1) |
2029 (1 << TCG_REG_R2) |
2030 (1 << TCG_REG_R3) |
2031 (1 << TCG_REG_R12) |
2032 (1 << TCG_REG_R14));
2033
2034 tcg_regset_clear(s->reserved_regs);
2035 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2036 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2037 tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2038
2039 tcg_add_target_add_op_defs(arm_op_defs);
2040 }
2041
2042 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2043 TCGReg arg1, intptr_t arg2)
2044 {
2045 tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2046 }
2047
2048 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2049 TCGReg arg1, intptr_t arg2)
2050 {
2051 tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2052 }
2053
2054 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2055 TCGReg base, intptr_t ofs)
2056 {
2057 return false;
2058 }
2059
2060 static inline void tcg_out_mov(TCGContext *s, TCGType type,
2061 TCGReg ret, TCGReg arg)
2062 {
2063 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2064 }
2065
2066 static inline void tcg_out_movi(TCGContext *s, TCGType type,
2067 TCGReg ret, tcg_target_long arg)
2068 {
2069 tcg_out_movi32(s, COND_AL, ret, arg);
2070 }
2071
2072 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2073 and tcg_register_jit. */
2074
2075 #define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2076
2077 #define FRAME_SIZE \
2078 ((PUSH_SIZE \
2079 + TCG_STATIC_CALL_ARGS_SIZE \
2080 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2081 + TCG_TARGET_STACK_ALIGN - 1) \
2082 & -TCG_TARGET_STACK_ALIGN)
2083
2084 static void tcg_target_qemu_prologue(TCGContext *s)
2085 {
2086 int stack_addend;
2087
2088 /* Calling convention requires us to save r4-r11 and lr. */
2089 /* stmdb sp!, { r4 - r11, lr } */
2090 tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2091
2092 /* Reserve callee argument and tcg temp space. */
2093 stack_addend = FRAME_SIZE - PUSH_SIZE;
2094
2095 tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2096 TCG_REG_CALL_STACK, stack_addend, 1);
2097 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2098 CPU_TEMP_BUF_NLONGS * sizeof(long));
2099
2100 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2101
2102 tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2103 tb_ret_addr = s->code_ptr;
2104
2105 /* Epilogue. We branch here via tb_ret_addr. */
2106 tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2107 TCG_REG_CALL_STACK, stack_addend, 1);
2108
2109 /* ldmia sp!, { r4 - r11, pc } */
2110 tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2111 }
2112
2113 typedef struct {
2114 DebugFrameHeader h;
2115 uint8_t fde_def_cfa[4];
2116 uint8_t fde_reg_ofs[18];
2117 } DebugFrame;
2118
2119 #define ELF_HOST_MACHINE EM_ARM
2120
2121 /* We're expecting a 2 byte uleb128 encoded value. */
2122 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2123
2124 static const DebugFrame debug_frame = {
2125 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2126 .h.cie.id = -1,
2127 .h.cie.version = 1,
2128 .h.cie.code_align = 1,
2129 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2130 .h.cie.return_column = 14,
2131
2132 /* Total FDE size does not include the "len" member. */
2133 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2134
2135 .fde_def_cfa = {
2136 12, 13, /* DW_CFA_def_cfa sp, ... */
2137 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2138 (FRAME_SIZE >> 7)
2139 },
2140 .fde_reg_ofs = {
2141 /* The following must match the stmdb in the prologue. */
2142 0x8e, 1, /* DW_CFA_offset, lr, -4 */
2143 0x8b, 2, /* DW_CFA_offset, r11, -8 */
2144 0x8a, 3, /* DW_CFA_offset, r10, -12 */
2145 0x89, 4, /* DW_CFA_offset, r9, -16 */
2146 0x88, 5, /* DW_CFA_offset, r8, -20 */
2147 0x87, 6, /* DW_CFA_offset, r7, -24 */
2148 0x86, 7, /* DW_CFA_offset, r6, -28 */
2149 0x85, 8, /* DW_CFA_offset, r5, -32 */
2150 0x84, 9, /* DW_CFA_offset, r4, -36 */
2151 }
2152 };
2153
2154 void tcg_register_jit(void *buf, size_t buf_size)
2155 {
2156 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2157 }