2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_LIVENESS_ANALYSIS
27 #define USE_TCG_OPTIMIZATIONS
31 /* Define to jump the ELF file used to communicate with GDB. */
34 #if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
35 /* define it to suppress various consistency checks (faster) */
39 #include "qemu-common.h"
40 #include "qemu/cache-utils.h"
41 #include "qemu/host-utils.h"
42 #include "qemu/timer.h"
44 /* Note: the long term plan is to reduce the dependancies on the QEMU
45 CPU definitions. Currently they are used for qemu_ld/st
47 #define NO_CPU_IO_DEFS
52 #if TCG_TARGET_REG_BITS == 64
53 # define ELF_CLASS ELFCLASS64
55 # define ELF_CLASS ELFCLASS32
57 #ifdef HOST_WORDS_BIGENDIAN
58 # define ELF_DATA ELFDATA2MSB
60 # define ELF_DATA ELFDATA2LSB
65 /* Forward declarations for functions declared in tcg-target.c and used here. */
66 static void tcg_target_init(TCGContext
*s
);
67 static void tcg_target_qemu_prologue(TCGContext
*s
);
68 static void patch_reloc(uint8_t *code_ptr
, int type
,
69 tcg_target_long value
, tcg_target_long addend
);
71 /* The CIE and FDE header definitions will be common to all hosts. */
73 uint32_t len
__attribute__((aligned((sizeof(void *)))));
79 uint8_t return_column
;
82 typedef struct QEMU_PACKED
{
83 uint32_t len
__attribute__((aligned((sizeof(void *)))));
85 tcg_target_long func_start
;
86 tcg_target_long func_len
;
87 } DebugFrameFDEHeader
;
89 static void tcg_register_jit_int(void *buf
, size_t size
,
90 void *debug_frame
, size_t debug_frame_size
)
91 __attribute__((unused
));
93 /* Forward declarations for functions declared and used in tcg-target.c. */
94 static int target_parse_constraint(TCGArgConstraint
*ct
, const char **pct_str
);
95 static void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg1
,
96 tcg_target_long arg2
);
97 static void tcg_out_mov(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg
);
98 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
99 TCGReg ret
, tcg_target_long arg
);
100 static void tcg_out_op(TCGContext
*s
, TCGOpcode opc
, const TCGArg
*args
,
101 const int *const_args
);
102 static void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
, TCGReg arg1
,
103 tcg_target_long arg2
);
104 static int tcg_target_const_match(tcg_target_long val
,
105 const TCGArgConstraint
*arg_ct
);
107 TCGOpDef tcg_op_defs
[] = {
108 #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
112 const size_t tcg_op_defs_max
= ARRAY_SIZE(tcg_op_defs
);
114 static TCGRegSet tcg_target_available_regs
[2];
115 static TCGRegSet tcg_target_call_clobber_regs
;
117 static inline void tcg_out8(TCGContext
*s
, uint8_t v
)
122 static inline void tcg_out16(TCGContext
*s
, uint16_t v
)
124 uint8_t *p
= s
->code_ptr
;
129 static inline void tcg_out32(TCGContext
*s
, uint32_t v
)
131 uint8_t *p
= s
->code_ptr
;
136 static inline void tcg_out64(TCGContext
*s
, uint64_t v
)
138 uint8_t *p
= s
->code_ptr
;
143 /* label relocation processing */
145 static void tcg_out_reloc(TCGContext
*s
, uint8_t *code_ptr
, int type
,
146 int label_index
, long addend
)
151 l
= &s
->labels
[label_index
];
153 /* FIXME: This may break relocations on RISC targets that
154 modify instruction fields in place. The caller may not have
155 written the initial value. */
156 patch_reloc(code_ptr
, type
, l
->u
.value
, addend
);
158 /* add a new relocation entry */
159 r
= tcg_malloc(sizeof(TCGRelocation
));
163 r
->next
= l
->u
.first_reloc
;
164 l
->u
.first_reloc
= r
;
168 static void tcg_out_label(TCGContext
*s
, int label_index
, void *ptr
)
172 tcg_target_long value
= (tcg_target_long
)ptr
;
174 l
= &s
->labels
[label_index
];
177 r
= l
->u
.first_reloc
;
179 patch_reloc(r
->ptr
, r
->type
, value
, r
->addend
);
186 int gen_new_label(void)
188 TCGContext
*s
= &tcg_ctx
;
192 if (s
->nb_labels
>= TCG_MAX_LABELS
)
194 idx
= s
->nb_labels
++;
197 l
->u
.first_reloc
= NULL
;
201 #include "tcg-target.c"
203 /* pool based memory allocation */
204 void *tcg_malloc_internal(TCGContext
*s
, int size
)
209 if (size
> TCG_POOL_CHUNK_SIZE
) {
210 /* big malloc: insert a new pool (XXX: could optimize) */
211 p
= g_malloc(sizeof(TCGPool
) + size
);
213 p
->next
= s
->pool_first_large
;
214 s
->pool_first_large
= p
;
225 pool_size
= TCG_POOL_CHUNK_SIZE
;
226 p
= g_malloc(sizeof(TCGPool
) + pool_size
);
230 s
->pool_current
->next
= p
;
239 s
->pool_cur
= p
->data
+ size
;
240 s
->pool_end
= p
->data
+ p
->size
;
244 void tcg_pool_reset(TCGContext
*s
)
247 for (p
= s
->pool_first_large
; p
; p
= t
) {
251 s
->pool_first_large
= NULL
;
252 s
->pool_cur
= s
->pool_end
= NULL
;
253 s
->pool_current
= NULL
;
256 void tcg_context_init(TCGContext
*s
)
258 int op
, total_args
, n
;
260 TCGArgConstraint
*args_ct
;
263 memset(s
, 0, sizeof(*s
));
266 /* Count total number of arguments and allocate the corresponding
269 for(op
= 0; op
< NB_OPS
; op
++) {
270 def
= &tcg_op_defs
[op
];
271 n
= def
->nb_iargs
+ def
->nb_oargs
;
275 args_ct
= g_malloc(sizeof(TCGArgConstraint
) * total_args
);
276 sorted_args
= g_malloc(sizeof(int) * total_args
);
278 for(op
= 0; op
< NB_OPS
; op
++) {
279 def
= &tcg_op_defs
[op
];
280 def
->args_ct
= args_ct
;
281 def
->sorted_args
= sorted_args
;
282 n
= def
->nb_iargs
+ def
->nb_oargs
;
290 void tcg_prologue_init(TCGContext
*s
)
292 /* init global prologue and epilogue */
293 s
->code_buf
= s
->code_gen_prologue
;
294 s
->code_ptr
= s
->code_buf
;
295 tcg_target_qemu_prologue(s
);
296 flush_icache_range((uintptr_t)s
->code_buf
, (uintptr_t)s
->code_ptr
);
299 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM
)) {
300 size_t size
= s
->code_ptr
- s
->code_buf
;
301 qemu_log("PROLOGUE: [size=%zu]\n", size
);
302 log_disas(s
->code_buf
, size
);
309 void tcg_set_frame(TCGContext
*s
, int reg
,
310 tcg_target_long start
, tcg_target_long size
)
312 s
->frame_start
= start
;
313 s
->frame_end
= start
+ size
;
317 void tcg_func_start(TCGContext
*s
)
321 s
->nb_temps
= s
->nb_globals
;
322 for(i
= 0; i
< (TCG_TYPE_COUNT
* 2); i
++)
323 s
->first_free_temp
[i
] = -1;
324 s
->labels
= tcg_malloc(sizeof(TCGLabel
) * TCG_MAX_LABELS
);
326 s
->current_frame_offset
= s
->frame_start
;
328 #ifdef CONFIG_DEBUG_TCG
329 s
->goto_tb_issue_mask
= 0;
332 s
->gen_opc_ptr
= s
->gen_opc_buf
;
333 s
->gen_opparam_ptr
= s
->gen_opparam_buf
;
335 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
336 /* Initialize qemu_ld/st labels to assist code generation at the end of TB
337 for TLB miss cases at the end of TB */
338 s
->qemu_ldst_labels
= tcg_malloc(sizeof(TCGLabelQemuLdst
) *
340 s
->nb_qemu_ldst_labels
= 0;
344 static inline void tcg_temp_alloc(TCGContext
*s
, int n
)
346 if (n
> TCG_MAX_TEMPS
)
350 static inline int tcg_global_reg_new_internal(TCGType type
, int reg
,
353 TCGContext
*s
= &tcg_ctx
;
357 #if TCG_TARGET_REG_BITS == 32
358 if (type
!= TCG_TYPE_I32
)
361 if (tcg_regset_test_reg(s
->reserved_regs
, reg
))
364 tcg_temp_alloc(s
, s
->nb_globals
+ 1);
365 ts
= &s
->temps
[s
->nb_globals
];
366 ts
->base_type
= type
;
372 tcg_regset_set_reg(s
->reserved_regs
, reg
);
376 TCGv_i32
tcg_global_reg_new_i32(int reg
, const char *name
)
380 idx
= tcg_global_reg_new_internal(TCG_TYPE_I32
, reg
, name
);
381 return MAKE_TCGV_I32(idx
);
384 TCGv_i64
tcg_global_reg_new_i64(int reg
, const char *name
)
388 idx
= tcg_global_reg_new_internal(TCG_TYPE_I64
, reg
, name
);
389 return MAKE_TCGV_I64(idx
);
392 static inline int tcg_global_mem_new_internal(TCGType type
, int reg
,
393 tcg_target_long offset
,
396 TCGContext
*s
= &tcg_ctx
;
401 #if TCG_TARGET_REG_BITS == 32
402 if (type
== TCG_TYPE_I64
) {
404 tcg_temp_alloc(s
, s
->nb_globals
+ 2);
405 ts
= &s
->temps
[s
->nb_globals
];
406 ts
->base_type
= type
;
407 ts
->type
= TCG_TYPE_I32
;
409 ts
->mem_allocated
= 1;
411 #ifdef TCG_TARGET_WORDS_BIGENDIAN
412 ts
->mem_offset
= offset
+ 4;
414 ts
->mem_offset
= offset
;
416 pstrcpy(buf
, sizeof(buf
), name
);
417 pstrcat(buf
, sizeof(buf
), "_0");
418 ts
->name
= strdup(buf
);
421 ts
->base_type
= type
;
422 ts
->type
= TCG_TYPE_I32
;
424 ts
->mem_allocated
= 1;
426 #ifdef TCG_TARGET_WORDS_BIGENDIAN
427 ts
->mem_offset
= offset
;
429 ts
->mem_offset
= offset
+ 4;
431 pstrcpy(buf
, sizeof(buf
), name
);
432 pstrcat(buf
, sizeof(buf
), "_1");
433 ts
->name
= strdup(buf
);
439 tcg_temp_alloc(s
, s
->nb_globals
+ 1);
440 ts
= &s
->temps
[s
->nb_globals
];
441 ts
->base_type
= type
;
444 ts
->mem_allocated
= 1;
446 ts
->mem_offset
= offset
;
453 TCGv_i32
tcg_global_mem_new_i32(int reg
, tcg_target_long offset
,
458 idx
= tcg_global_mem_new_internal(TCG_TYPE_I32
, reg
, offset
, name
);
459 return MAKE_TCGV_I32(idx
);
462 TCGv_i64
tcg_global_mem_new_i64(int reg
, tcg_target_long offset
,
467 idx
= tcg_global_mem_new_internal(TCG_TYPE_I64
, reg
, offset
, name
);
468 return MAKE_TCGV_I64(idx
);
471 static inline int tcg_temp_new_internal(TCGType type
, int temp_local
)
473 TCGContext
*s
= &tcg_ctx
;
480 idx
= s
->first_free_temp
[k
];
482 /* There is already an available temp with the
485 s
->first_free_temp
[k
] = ts
->next_free_temp
;
486 ts
->temp_allocated
= 1;
487 assert(ts
->temp_local
== temp_local
);
490 #if TCG_TARGET_REG_BITS == 32
491 if (type
== TCG_TYPE_I64
) {
492 tcg_temp_alloc(s
, s
->nb_temps
+ 2);
493 ts
= &s
->temps
[s
->nb_temps
];
494 ts
->base_type
= type
;
495 ts
->type
= TCG_TYPE_I32
;
496 ts
->temp_allocated
= 1;
497 ts
->temp_local
= temp_local
;
500 ts
->base_type
= TCG_TYPE_I32
;
501 ts
->type
= TCG_TYPE_I32
;
502 ts
->temp_allocated
= 1;
503 ts
->temp_local
= temp_local
;
509 tcg_temp_alloc(s
, s
->nb_temps
+ 1);
510 ts
= &s
->temps
[s
->nb_temps
];
511 ts
->base_type
= type
;
513 ts
->temp_allocated
= 1;
514 ts
->temp_local
= temp_local
;
520 #if defined(CONFIG_DEBUG_TCG)
526 TCGv_i32
tcg_temp_new_internal_i32(int temp_local
)
530 idx
= tcg_temp_new_internal(TCG_TYPE_I32
, temp_local
);
531 return MAKE_TCGV_I32(idx
);
534 TCGv_i64
tcg_temp_new_internal_i64(int temp_local
)
538 idx
= tcg_temp_new_internal(TCG_TYPE_I64
, temp_local
);
539 return MAKE_TCGV_I64(idx
);
542 static inline void tcg_temp_free_internal(int idx
)
544 TCGContext
*s
= &tcg_ctx
;
548 #if defined(CONFIG_DEBUG_TCG)
550 if (s
->temps_in_use
< 0) {
551 fprintf(stderr
, "More temporaries freed than allocated!\n");
555 assert(idx
>= s
->nb_globals
&& idx
< s
->nb_temps
);
557 assert(ts
->temp_allocated
!= 0);
558 ts
->temp_allocated
= 0;
562 ts
->next_free_temp
= s
->first_free_temp
[k
];
563 s
->first_free_temp
[k
] = idx
;
566 void tcg_temp_free_i32(TCGv_i32 arg
)
568 tcg_temp_free_internal(GET_TCGV_I32(arg
));
571 void tcg_temp_free_i64(TCGv_i64 arg
)
573 tcg_temp_free_internal(GET_TCGV_I64(arg
));
576 TCGv_i32
tcg_const_i32(int32_t val
)
579 t0
= tcg_temp_new_i32();
580 tcg_gen_movi_i32(t0
, val
);
584 TCGv_i64
tcg_const_i64(int64_t val
)
587 t0
= tcg_temp_new_i64();
588 tcg_gen_movi_i64(t0
, val
);
592 TCGv_i32
tcg_const_local_i32(int32_t val
)
595 t0
= tcg_temp_local_new_i32();
596 tcg_gen_movi_i32(t0
, val
);
600 TCGv_i64
tcg_const_local_i64(int64_t val
)
603 t0
= tcg_temp_local_new_i64();
604 tcg_gen_movi_i64(t0
, val
);
608 #if defined(CONFIG_DEBUG_TCG)
609 void tcg_clear_temp_count(void)
611 TCGContext
*s
= &tcg_ctx
;
615 int tcg_check_temp_count(void)
617 TCGContext
*s
= &tcg_ctx
;
618 if (s
->temps_in_use
) {
619 /* Clear the count so that we don't give another
620 * warning immediately next time around.
629 void tcg_register_helper(void *func
, const char *name
)
631 TCGContext
*s
= &tcg_ctx
;
633 if ((s
->nb_helpers
+ 1) > s
->allocated_helpers
) {
634 n
= s
->allocated_helpers
;
640 s
->helpers
= realloc(s
->helpers
, n
* sizeof(TCGHelperInfo
));
641 s
->allocated_helpers
= n
;
643 s
->helpers
[s
->nb_helpers
].func
= (tcg_target_ulong
)func
;
644 s
->helpers
[s
->nb_helpers
].name
= name
;
648 /* Note: we convert the 64 bit args to 32 bit and do some alignment
649 and endian swap. Maybe it would be better to do the alignment
650 and endian swap in tcg_reg_alloc_call(). */
651 void tcg_gen_callN(TCGContext
*s
, TCGv_ptr func
, unsigned int flags
,
652 int sizemask
, TCGArg ret
, int nargs
, TCGArg
*args
)
659 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
660 for (i
= 0; i
< nargs
; ++i
) {
661 int is_64bit
= sizemask
& (1 << (i
+1)*2);
662 int is_signed
= sizemask
& (2 << (i
+1)*2);
664 TCGv_i64 temp
= tcg_temp_new_i64();
665 TCGv_i64 orig
= MAKE_TCGV_I64(args
[i
]);
667 tcg_gen_ext32s_i64(temp
, orig
);
669 tcg_gen_ext32u_i64(temp
, orig
);
671 args
[i
] = GET_TCGV_I64(temp
);
674 #endif /* TCG_TARGET_EXTEND_ARGS */
676 *s
->gen_opc_ptr
++ = INDEX_op_call
;
677 nparam
= s
->gen_opparam_ptr
++;
678 if (ret
!= TCG_CALL_DUMMY_ARG
) {
679 #if TCG_TARGET_REG_BITS < 64
681 #ifdef TCG_TARGET_WORDS_BIGENDIAN
682 *s
->gen_opparam_ptr
++ = ret
+ 1;
683 *s
->gen_opparam_ptr
++ = ret
;
685 *s
->gen_opparam_ptr
++ = ret
;
686 *s
->gen_opparam_ptr
++ = ret
+ 1;
692 *s
->gen_opparam_ptr
++ = ret
;
699 for (i
= 0; i
< nargs
; i
++) {
700 #if TCG_TARGET_REG_BITS < 64
701 int is_64bit
= sizemask
& (1 << (i
+1)*2);
703 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
704 /* some targets want aligned 64 bit args */
706 *s
->gen_opparam_ptr
++ = TCG_CALL_DUMMY_ARG
;
710 /* If stack grows up, then we will be placing successive
711 arguments at lower addresses, which means we need to
712 reverse the order compared to how we would normally
713 treat either big or little-endian. For those arguments
714 that will wind up in registers, this still works for
715 HPPA (the only current STACK_GROWSUP target) since the
716 argument registers are *also* allocated in decreasing
717 order. If another such target is added, this logic may
718 have to get more complicated to differentiate between
719 stack arguments and register arguments. */
720 #if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
721 *s
->gen_opparam_ptr
++ = args
[i
] + 1;
722 *s
->gen_opparam_ptr
++ = args
[i
];
724 *s
->gen_opparam_ptr
++ = args
[i
];
725 *s
->gen_opparam_ptr
++ = args
[i
] + 1;
730 #endif /* TCG_TARGET_REG_BITS < 64 */
732 *s
->gen_opparam_ptr
++ = args
[i
];
735 *s
->gen_opparam_ptr
++ = GET_TCGV_PTR(func
);
737 *s
->gen_opparam_ptr
++ = flags
;
739 *nparam
= (nb_rets
<< 16) | (real_args
+ 1);
741 /* total parameters, needed to go backward in the instruction stream */
742 *s
->gen_opparam_ptr
++ = 1 + nb_rets
+ real_args
+ 3;
744 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
745 for (i
= 0; i
< nargs
; ++i
) {
746 int is_64bit
= sizemask
& (1 << (i
+1)*2);
748 TCGv_i64 temp
= MAKE_TCGV_I64(args
[i
]);
749 tcg_temp_free_i64(temp
);
752 #endif /* TCG_TARGET_EXTEND_ARGS */
755 #if TCG_TARGET_REG_BITS == 32
756 void tcg_gen_shifti_i64(TCGv_i64 ret
, TCGv_i64 arg1
,
757 int c
, int right
, int arith
)
760 tcg_gen_mov_i32(TCGV_LOW(ret
), TCGV_LOW(arg1
));
761 tcg_gen_mov_i32(TCGV_HIGH(ret
), TCGV_HIGH(arg1
));
762 } else if (c
>= 32) {
766 tcg_gen_sari_i32(TCGV_LOW(ret
), TCGV_HIGH(arg1
), c
);
767 tcg_gen_sari_i32(TCGV_HIGH(ret
), TCGV_HIGH(arg1
), 31);
769 tcg_gen_shri_i32(TCGV_LOW(ret
), TCGV_HIGH(arg1
), c
);
770 tcg_gen_movi_i32(TCGV_HIGH(ret
), 0);
773 tcg_gen_shli_i32(TCGV_HIGH(ret
), TCGV_LOW(arg1
), c
);
774 tcg_gen_movi_i32(TCGV_LOW(ret
), 0);
779 t0
= tcg_temp_new_i32();
780 t1
= tcg_temp_new_i32();
782 tcg_gen_shli_i32(t0
, TCGV_HIGH(arg1
), 32 - c
);
784 tcg_gen_sari_i32(t1
, TCGV_HIGH(arg1
), c
);
786 tcg_gen_shri_i32(t1
, TCGV_HIGH(arg1
), c
);
787 tcg_gen_shri_i32(TCGV_LOW(ret
), TCGV_LOW(arg1
), c
);
788 tcg_gen_or_i32(TCGV_LOW(ret
), TCGV_LOW(ret
), t0
);
789 tcg_gen_mov_i32(TCGV_HIGH(ret
), t1
);
791 tcg_gen_shri_i32(t0
, TCGV_LOW(arg1
), 32 - c
);
792 /* Note: ret can be the same as arg1, so we use t1 */
793 tcg_gen_shli_i32(t1
, TCGV_LOW(arg1
), c
);
794 tcg_gen_shli_i32(TCGV_HIGH(ret
), TCGV_HIGH(arg1
), c
);
795 tcg_gen_or_i32(TCGV_HIGH(ret
), TCGV_HIGH(ret
), t0
);
796 tcg_gen_mov_i32(TCGV_LOW(ret
), t1
);
798 tcg_temp_free_i32(t0
);
799 tcg_temp_free_i32(t1
);
805 static void tcg_reg_alloc_start(TCGContext
*s
)
809 for(i
= 0; i
< s
->nb_globals
; i
++) {
812 ts
->val_type
= TEMP_VAL_REG
;
814 ts
->val_type
= TEMP_VAL_MEM
;
817 for(i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
819 if (ts
->temp_local
) {
820 ts
->val_type
= TEMP_VAL_MEM
;
822 ts
->val_type
= TEMP_VAL_DEAD
;
824 ts
->mem_allocated
= 0;
827 for(i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
828 s
->reg_to_temp
[i
] = -1;
832 static char *tcg_get_arg_str_idx(TCGContext
*s
, char *buf
, int buf_size
,
837 assert(idx
>= 0 && idx
< s
->nb_temps
);
839 if (idx
< s
->nb_globals
) {
840 pstrcpy(buf
, buf_size
, ts
->name
);
843 snprintf(buf
, buf_size
, "loc%d", idx
- s
->nb_globals
);
845 snprintf(buf
, buf_size
, "tmp%d", idx
- s
->nb_globals
);
850 char *tcg_get_arg_str_i32(TCGContext
*s
, char *buf
, int buf_size
, TCGv_i32 arg
)
852 return tcg_get_arg_str_idx(s
, buf
, buf_size
, GET_TCGV_I32(arg
));
855 char *tcg_get_arg_str_i64(TCGContext
*s
, char *buf
, int buf_size
, TCGv_i64 arg
)
857 return tcg_get_arg_str_idx(s
, buf
, buf_size
, GET_TCGV_I64(arg
));
860 static int helper_cmp(const void *p1
, const void *p2
)
862 const TCGHelperInfo
*th1
= p1
;
863 const TCGHelperInfo
*th2
= p2
;
864 if (th1
->func
< th2
->func
)
866 else if (th1
->func
== th2
->func
)
872 /* find helper definition (Note: A hash table would be better) */
873 static TCGHelperInfo
*tcg_find_helper(TCGContext
*s
, tcg_target_ulong val
)
879 if (unlikely(!s
->helpers_sorted
)) {
880 qsort(s
->helpers
, s
->nb_helpers
, sizeof(TCGHelperInfo
),
882 s
->helpers_sorted
= 1;
887 m_max
= s
->nb_helpers
- 1;
888 while (m_min
<= m_max
) {
889 m
= (m_min
+ m_max
) >> 1;
903 static const char * const cond_name
[] =
905 [TCG_COND_NEVER
] = "never",
906 [TCG_COND_ALWAYS
] = "always",
907 [TCG_COND_EQ
] = "eq",
908 [TCG_COND_NE
] = "ne",
909 [TCG_COND_LT
] = "lt",
910 [TCG_COND_GE
] = "ge",
911 [TCG_COND_LE
] = "le",
912 [TCG_COND_GT
] = "gt",
913 [TCG_COND_LTU
] = "ltu",
914 [TCG_COND_GEU
] = "geu",
915 [TCG_COND_LEU
] = "leu",
916 [TCG_COND_GTU
] = "gtu"
919 void tcg_dump_ops(TCGContext
*s
)
921 const uint16_t *opc_ptr
;
925 int i
, k
, nb_oargs
, nb_iargs
, nb_cargs
, first_insn
;
930 opc_ptr
= s
->gen_opc_buf
;
931 args
= s
->gen_opparam_buf
;
932 while (opc_ptr
< s
->gen_opc_ptr
) {
934 def
= &tcg_op_defs
[c
];
935 if (c
== INDEX_op_debug_insn_start
) {
937 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
938 pc
= ((uint64_t)args
[1] << 32) | args
[0];
945 qemu_log(" ---- 0x%" PRIx64
, pc
);
947 nb_oargs
= def
->nb_oargs
;
948 nb_iargs
= def
->nb_iargs
;
949 nb_cargs
= def
->nb_cargs
;
950 } else if (c
== INDEX_op_call
) {
953 /* variable number of arguments */
955 nb_oargs
= arg
>> 16;
956 nb_iargs
= arg
& 0xffff;
957 nb_cargs
= def
->nb_cargs
;
959 qemu_log(" %s ", def
->name
);
963 tcg_get_arg_str_idx(s
, buf
, sizeof(buf
),
964 args
[nb_oargs
+ nb_iargs
- 1]));
966 qemu_log(",$0x%" TCG_PRIlx
, args
[nb_oargs
+ nb_iargs
]);
968 qemu_log(",$%d", nb_oargs
);
969 for(i
= 0; i
< nb_oargs
; i
++) {
971 qemu_log("%s", tcg_get_arg_str_idx(s
, buf
, sizeof(buf
),
974 for(i
= 0; i
< (nb_iargs
- 1); i
++) {
976 if (args
[nb_oargs
+ i
] == TCG_CALL_DUMMY_ARG
) {
979 qemu_log("%s", tcg_get_arg_str_idx(s
, buf
, sizeof(buf
),
980 args
[nb_oargs
+ i
]));
983 } else if (c
== INDEX_op_movi_i32
|| c
== INDEX_op_movi_i64
) {
984 tcg_target_ulong val
;
987 nb_oargs
= def
->nb_oargs
;
988 nb_iargs
= def
->nb_iargs
;
989 nb_cargs
= def
->nb_cargs
;
990 qemu_log(" %s %s,$", def
->name
,
991 tcg_get_arg_str_idx(s
, buf
, sizeof(buf
), args
[0]));
993 th
= tcg_find_helper(s
, val
);
995 qemu_log("%s", th
->name
);
997 if (c
== INDEX_op_movi_i32
) {
998 qemu_log("0x%x", (uint32_t)val
);
1000 qemu_log("0x%" PRIx64
, (uint64_t)val
);
1004 qemu_log(" %s ", def
->name
);
1005 if (c
== INDEX_op_nopn
) {
1006 /* variable number of arguments */
1011 nb_oargs
= def
->nb_oargs
;
1012 nb_iargs
= def
->nb_iargs
;
1013 nb_cargs
= def
->nb_cargs
;
1017 for(i
= 0; i
< nb_oargs
; i
++) {
1021 qemu_log("%s", tcg_get_arg_str_idx(s
, buf
, sizeof(buf
),
1024 for(i
= 0; i
< nb_iargs
; i
++) {
1028 qemu_log("%s", tcg_get_arg_str_idx(s
, buf
, sizeof(buf
),
1032 case INDEX_op_brcond_i32
:
1033 case INDEX_op_setcond_i32
:
1034 case INDEX_op_movcond_i32
:
1035 case INDEX_op_brcond2_i32
:
1036 case INDEX_op_setcond2_i32
:
1037 case INDEX_op_brcond_i64
:
1038 case INDEX_op_setcond_i64
:
1039 case INDEX_op_movcond_i64
:
1040 if (args
[k
] < ARRAY_SIZE(cond_name
) && cond_name
[args
[k
]]) {
1041 qemu_log(",%s", cond_name
[args
[k
++]]);
1043 qemu_log(",$0x%" TCG_PRIlx
, args
[k
++]);
1051 for(; i
< nb_cargs
; i
++) {
1056 qemu_log("$0x%" TCG_PRIlx
, arg
);
1060 args
+= nb_iargs
+ nb_oargs
+ nb_cargs
;
1064 /* we give more priority to constraints with less registers */
1065 static int get_constraint_priority(const TCGOpDef
*def
, int k
)
1067 const TCGArgConstraint
*arg_ct
;
1070 arg_ct
= &def
->args_ct
[k
];
1071 if (arg_ct
->ct
& TCG_CT_ALIAS
) {
1072 /* an alias is equivalent to a single register */
1075 if (!(arg_ct
->ct
& TCG_CT_REG
))
1078 for(i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
1079 if (tcg_regset_test_reg(arg_ct
->u
.regs
, i
))
1083 return TCG_TARGET_NB_REGS
- n
+ 1;
1086 /* sort from highest priority to lowest */
1087 static void sort_constraints(TCGOpDef
*def
, int start
, int n
)
1089 int i
, j
, p1
, p2
, tmp
;
1091 for(i
= 0; i
< n
; i
++)
1092 def
->sorted_args
[start
+ i
] = start
+ i
;
1095 for(i
= 0; i
< n
- 1; i
++) {
1096 for(j
= i
+ 1; j
< n
; j
++) {
1097 p1
= get_constraint_priority(def
, def
->sorted_args
[start
+ i
]);
1098 p2
= get_constraint_priority(def
, def
->sorted_args
[start
+ j
]);
1100 tmp
= def
->sorted_args
[start
+ i
];
1101 def
->sorted_args
[start
+ i
] = def
->sorted_args
[start
+ j
];
1102 def
->sorted_args
[start
+ j
] = tmp
;
1108 void tcg_add_target_add_op_defs(const TCGTargetOpDef
*tdefs
)
1116 if (tdefs
->op
== (TCGOpcode
)-1)
1119 assert((unsigned)op
< NB_OPS
);
1120 def
= &tcg_op_defs
[op
];
1121 #if defined(CONFIG_DEBUG_TCG)
1122 /* Duplicate entry in op definitions? */
1126 nb_args
= def
->nb_iargs
+ def
->nb_oargs
;
1127 for(i
= 0; i
< nb_args
; i
++) {
1128 ct_str
= tdefs
->args_ct_str
[i
];
1129 /* Incomplete TCGTargetOpDef entry? */
1130 assert(ct_str
!= NULL
);
1131 tcg_regset_clear(def
->args_ct
[i
].u
.regs
);
1132 def
->args_ct
[i
].ct
= 0;
1133 if (ct_str
[0] >= '0' && ct_str
[0] <= '9') {
1135 oarg
= ct_str
[0] - '0';
1136 assert(oarg
< def
->nb_oargs
);
1137 assert(def
->args_ct
[oarg
].ct
& TCG_CT_REG
);
1138 /* TCG_CT_ALIAS is for the output arguments. The input
1139 argument is tagged with TCG_CT_IALIAS. */
1140 def
->args_ct
[i
] = def
->args_ct
[oarg
];
1141 def
->args_ct
[oarg
].ct
= TCG_CT_ALIAS
;
1142 def
->args_ct
[oarg
].alias_index
= i
;
1143 def
->args_ct
[i
].ct
|= TCG_CT_IALIAS
;
1144 def
->args_ct
[i
].alias_index
= oarg
;
1147 if (*ct_str
== '\0')
1151 def
->args_ct
[i
].ct
|= TCG_CT_CONST
;
1155 if (target_parse_constraint(&def
->args_ct
[i
], &ct_str
) < 0) {
1156 fprintf(stderr
, "Invalid constraint '%s' for arg %d of operation '%s'\n",
1157 ct_str
, i
, def
->name
);
1165 /* TCGTargetOpDef entry with too much information? */
1166 assert(i
== TCG_MAX_OP_ARGS
|| tdefs
->args_ct_str
[i
] == NULL
);
1168 /* sort the constraints (XXX: this is just an heuristic) */
1169 sort_constraints(def
, 0, def
->nb_oargs
);
1170 sort_constraints(def
, def
->nb_oargs
, def
->nb_iargs
);
1176 printf("%s: sorted=", def
->name
);
1177 for(i
= 0; i
< def
->nb_oargs
+ def
->nb_iargs
; i
++)
1178 printf(" %d", def
->sorted_args
[i
]);
1185 #if defined(CONFIG_DEBUG_TCG)
1187 for (op
= 0; op
< ARRAY_SIZE(tcg_op_defs
); op
++) {
1188 const TCGOpDef
*def
= &tcg_op_defs
[op
];
1189 if (def
->flags
& TCG_OPF_NOT_PRESENT
) {
1190 /* Wrong entry in op definitions? */
1192 fprintf(stderr
, "Invalid op definition for %s\n", def
->name
);
1196 /* Missing entry in op definitions? */
1198 fprintf(stderr
, "Missing op definition for %s\n", def
->name
);
1209 #ifdef USE_LIVENESS_ANALYSIS
1211 /* set a nop for an operation using 'nb_args' */
1212 static inline void tcg_set_nop(TCGContext
*s
, uint16_t *opc_ptr
,
1213 TCGArg
*args
, int nb_args
)
1216 *opc_ptr
= INDEX_op_nop
;
1218 *opc_ptr
= INDEX_op_nopn
;
1220 args
[nb_args
- 1] = nb_args
;
1224 /* liveness analysis: end of function: all temps are dead, and globals
1225 should be in memory. */
1226 static inline void tcg_la_func_end(TCGContext
*s
, uint8_t *dead_temps
,
1229 memset(dead_temps
, 1, s
->nb_temps
);
1230 memset(mem_temps
, 1, s
->nb_globals
);
1231 memset(mem_temps
+ s
->nb_globals
, 0, s
->nb_temps
- s
->nb_globals
);
1234 /* liveness analysis: end of basic block: all temps are dead, globals
1235 and local temps should be in memory. */
1236 static inline void tcg_la_bb_end(TCGContext
*s
, uint8_t *dead_temps
,
1241 memset(dead_temps
, 1, s
->nb_temps
);
1242 memset(mem_temps
, 1, s
->nb_globals
);
1243 for(i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
1244 mem_temps
[i
] = s
->temps
[i
].temp_local
;
1248 /* Liveness analysis : update the opc_dead_args array to tell if a
1249 given input arguments is dead. Instructions updating dead
1250 temporaries are removed. */
1251 static void tcg_liveness_analysis(TCGContext
*s
)
1253 int i
, op_index
, nb_args
, nb_iargs
, nb_oargs
, arg
, nb_ops
;
1254 TCGOpcode op
, op_new
, op_new2
;
1256 const TCGOpDef
*def
;
1257 uint8_t *dead_temps
, *mem_temps
;
1262 s
->gen_opc_ptr
++; /* skip end */
1264 nb_ops
= s
->gen_opc_ptr
- s
->gen_opc_buf
;
1266 s
->op_dead_args
= tcg_malloc(nb_ops
* sizeof(uint16_t));
1267 s
->op_sync_args
= tcg_malloc(nb_ops
* sizeof(uint8_t));
1269 dead_temps
= tcg_malloc(s
->nb_temps
);
1270 mem_temps
= tcg_malloc(s
->nb_temps
);
1271 tcg_la_func_end(s
, dead_temps
, mem_temps
);
1273 args
= s
->gen_opparam_ptr
;
1274 op_index
= nb_ops
- 1;
1275 while (op_index
>= 0) {
1276 op
= s
->gen_opc_buf
[op_index
];
1277 def
= &tcg_op_defs
[op
];
1285 nb_iargs
= args
[0] & 0xffff;
1286 nb_oargs
= args
[0] >> 16;
1288 call_flags
= args
[nb_oargs
+ nb_iargs
];
1290 /* pure functions can be removed if their result is not
1292 if (call_flags
& TCG_CALL_NO_SIDE_EFFECTS
) {
1293 for(i
= 0; i
< nb_oargs
; i
++) {
1295 if (!dead_temps
[arg
] || mem_temps
[arg
]) {
1296 goto do_not_remove_call
;
1299 tcg_set_nop(s
, s
->gen_opc_buf
+ op_index
,
1304 /* output args are dead */
1307 for(i
= 0; i
< nb_oargs
; i
++) {
1309 if (dead_temps
[arg
]) {
1310 dead_args
|= (1 << i
);
1312 if (mem_temps
[arg
]) {
1313 sync_args
|= (1 << i
);
1315 dead_temps
[arg
] = 1;
1319 if (!(call_flags
& TCG_CALL_NO_READ_GLOBALS
)) {
1320 /* globals should be synced to memory */
1321 memset(mem_temps
, 1, s
->nb_globals
);
1323 if (!(call_flags
& (TCG_CALL_NO_WRITE_GLOBALS
|
1324 TCG_CALL_NO_READ_GLOBALS
))) {
1325 /* globals should go back to memory */
1326 memset(dead_temps
, 1, s
->nb_globals
);
1329 /* input args are live */
1330 for(i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
1332 if (arg
!= TCG_CALL_DUMMY_ARG
) {
1333 if (dead_temps
[arg
]) {
1334 dead_args
|= (1 << i
);
1336 dead_temps
[arg
] = 0;
1339 s
->op_dead_args
[op_index
] = dead_args
;
1340 s
->op_sync_args
[op_index
] = sync_args
;
1345 case INDEX_op_debug_insn_start
:
1346 args
-= def
->nb_args
;
1352 case INDEX_op_discard
:
1354 /* mark the temporary as dead */
1355 dead_temps
[args
[0]] = 1;
1356 mem_temps
[args
[0]] = 0;
1361 case INDEX_op_add2_i32
:
1362 op_new
= INDEX_op_add_i32
;
1364 case INDEX_op_sub2_i32
:
1365 op_new
= INDEX_op_sub_i32
;
1367 case INDEX_op_add2_i64
:
1368 op_new
= INDEX_op_add_i64
;
1370 case INDEX_op_sub2_i64
:
1371 op_new
= INDEX_op_sub_i64
;
1376 /* Test if the high part of the operation is dead, but not
1377 the low part. The result can be optimized to a simple
1378 add or sub. This happens often for x86_64 guest when the
1379 cpu mode is set to 32 bit. */
1380 if (dead_temps
[args
[1]] && !mem_temps
[args
[1]]) {
1381 if (dead_temps
[args
[0]] && !mem_temps
[args
[0]]) {
1384 /* Create the single operation plus nop. */
1385 s
->gen_opc_buf
[op_index
] = op
= op_new
;
1388 assert(s
->gen_opc_buf
[op_index
+ 1] == INDEX_op_nop
);
1389 tcg_set_nop(s
, s
->gen_opc_buf
+ op_index
+ 1, args
+ 3, 3);
1390 /* Fall through and mark the single-word operation live. */
1396 case INDEX_op_mulu2_i32
:
1397 op_new
= INDEX_op_mul_i32
;
1398 op_new2
= INDEX_op_muluh_i32
;
1399 have_op_new2
= TCG_TARGET_HAS_muluh_i32
;
1401 case INDEX_op_muls2_i32
:
1402 op_new
= INDEX_op_mul_i32
;
1403 op_new2
= INDEX_op_mulsh_i32
;
1404 have_op_new2
= TCG_TARGET_HAS_mulsh_i32
;
1406 case INDEX_op_mulu2_i64
:
1407 op_new
= INDEX_op_mul_i64
;
1408 op_new2
= INDEX_op_muluh_i64
;
1409 have_op_new2
= TCG_TARGET_HAS_muluh_i64
;
1411 case INDEX_op_muls2_i64
:
1412 op_new
= INDEX_op_mul_i64
;
1413 op_new2
= INDEX_op_mulsh_i64
;
1414 have_op_new2
= TCG_TARGET_HAS_mulsh_i64
;
1420 if (dead_temps
[args
[1]] && !mem_temps
[args
[1]]) {
1421 if (dead_temps
[args
[0]] && !mem_temps
[args
[0]]) {
1422 /* Both parts of the operation are dead. */
1425 /* The high part of the operation is dead; generate the low. */
1426 s
->gen_opc_buf
[op_index
] = op
= op_new
;
1429 } else if (have_op_new2
&& dead_temps
[args
[0]]
1430 && !mem_temps
[args
[0]]) {
1431 /* The low part of the operation is dead; generate the high. */
1432 s
->gen_opc_buf
[op_index
] = op
= op_new2
;
1439 assert(s
->gen_opc_buf
[op_index
+ 1] == INDEX_op_nop
);
1440 tcg_set_nop(s
, s
->gen_opc_buf
+ op_index
+ 1, args
+ 3, 1);
1441 /* Mark the single-word operation live. */
1446 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
1447 args
-= def
->nb_args
;
1448 nb_iargs
= def
->nb_iargs
;
1449 nb_oargs
= def
->nb_oargs
;
1451 /* Test if the operation can be removed because all
1452 its outputs are dead. We assume that nb_oargs == 0
1453 implies side effects */
1454 if (!(def
->flags
& TCG_OPF_SIDE_EFFECTS
) && nb_oargs
!= 0) {
1455 for(i
= 0; i
< nb_oargs
; i
++) {
1457 if (!dead_temps
[arg
] || mem_temps
[arg
]) {
1462 tcg_set_nop(s
, s
->gen_opc_buf
+ op_index
, args
, def
->nb_args
);
1463 #ifdef CONFIG_PROFILER
1469 /* output args are dead */
1472 for(i
= 0; i
< nb_oargs
; i
++) {
1474 if (dead_temps
[arg
]) {
1475 dead_args
|= (1 << i
);
1477 if (mem_temps
[arg
]) {
1478 sync_args
|= (1 << i
);
1480 dead_temps
[arg
] = 1;
1484 /* if end of basic block, update */
1485 if (def
->flags
& TCG_OPF_BB_END
) {
1486 tcg_la_bb_end(s
, dead_temps
, mem_temps
);
1487 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
1488 /* globals should be synced to memory */
1489 memset(mem_temps
, 1, s
->nb_globals
);
1492 /* input args are live */
1493 for(i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
1495 if (dead_temps
[arg
]) {
1496 dead_args
|= (1 << i
);
1498 dead_temps
[arg
] = 0;
1500 s
->op_dead_args
[op_index
] = dead_args
;
1501 s
->op_sync_args
[op_index
] = sync_args
;
1508 if (args
!= s
->gen_opparam_buf
) {
1513 /* dummy liveness analysis */
1514 static void tcg_liveness_analysis(TCGContext
*s
)
1517 nb_ops
= s
->gen_opc_ptr
- s
->gen_opc_buf
;
1519 s
->op_dead_args
= tcg_malloc(nb_ops
* sizeof(uint16_t));
1520 memset(s
->op_dead_args
, 0, nb_ops
* sizeof(uint16_t));
1521 s
->op_sync_args
= tcg_malloc(nb_ops
* sizeof(uint8_t));
1522 memset(s
->op_sync_args
, 0, nb_ops
* sizeof(uint8_t));
1527 static void dump_regs(TCGContext
*s
)
1533 for(i
= 0; i
< s
->nb_temps
; i
++) {
1535 printf(" %10s: ", tcg_get_arg_str_idx(s
, buf
, sizeof(buf
), i
));
1536 switch(ts
->val_type
) {
1538 printf("%s", tcg_target_reg_names
[ts
->reg
]);
1541 printf("%d(%s)", (int)ts
->mem_offset
, tcg_target_reg_names
[ts
->mem_reg
]);
1543 case TEMP_VAL_CONST
:
1544 printf("$0x%" TCG_PRIlx
, ts
->val
);
1556 for(i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
1557 if (s
->reg_to_temp
[i
] >= 0) {
1559 tcg_target_reg_names
[i
],
1560 tcg_get_arg_str_idx(s
, buf
, sizeof(buf
), s
->reg_to_temp
[i
]));
1565 static void check_regs(TCGContext
*s
)
1571 for(reg
= 0; reg
< TCG_TARGET_NB_REGS
; reg
++) {
1572 k
= s
->reg_to_temp
[reg
];
1575 if (ts
->val_type
!= TEMP_VAL_REG
||
1577 printf("Inconsistency for register %s:\n",
1578 tcg_target_reg_names
[reg
]);
1583 for(k
= 0; k
< s
->nb_temps
; k
++) {
1585 if (ts
->val_type
== TEMP_VAL_REG
&&
1587 s
->reg_to_temp
[ts
->reg
] != k
) {
1588 printf("Inconsistency for temp %s:\n",
1589 tcg_get_arg_str_idx(s
, buf
, sizeof(buf
), k
));
1591 printf("reg state:\n");
1599 static void temp_allocate_frame(TCGContext
*s
, int temp
)
1602 ts
= &s
->temps
[temp
];
1603 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
1604 /* Sparc64 stack is accessed with offset of 2047 */
1605 s
->current_frame_offset
= (s
->current_frame_offset
+
1606 (tcg_target_long
)sizeof(tcg_target_long
) - 1) &
1607 ~(sizeof(tcg_target_long
) - 1);
1609 if (s
->current_frame_offset
+ (tcg_target_long
)sizeof(tcg_target_long
) >
1613 ts
->mem_offset
= s
->current_frame_offset
;
1614 ts
->mem_reg
= s
->frame_reg
;
1615 ts
->mem_allocated
= 1;
1616 s
->current_frame_offset
+= (tcg_target_long
)sizeof(tcg_target_long
);
1619 /* sync register 'reg' by saving it to the corresponding temporary */
1620 static inline void tcg_reg_sync(TCGContext
*s
, int reg
)
1625 temp
= s
->reg_to_temp
[reg
];
1626 ts
= &s
->temps
[temp
];
1627 assert(ts
->val_type
== TEMP_VAL_REG
);
1628 if (!ts
->mem_coherent
&& !ts
->fixed_reg
) {
1629 if (!ts
->mem_allocated
) {
1630 temp_allocate_frame(s
, temp
);
1632 tcg_out_st(s
, ts
->type
, reg
, ts
->mem_reg
, ts
->mem_offset
);
1634 ts
->mem_coherent
= 1;
1637 /* free register 'reg' by spilling the corresponding temporary if necessary */
1638 static void tcg_reg_free(TCGContext
*s
, int reg
)
1642 temp
= s
->reg_to_temp
[reg
];
1644 tcg_reg_sync(s
, reg
);
1645 s
->temps
[temp
].val_type
= TEMP_VAL_MEM
;
1646 s
->reg_to_temp
[reg
] = -1;
1650 /* Allocate a register belonging to reg1 & ~reg2 */
1651 static int tcg_reg_alloc(TCGContext
*s
, TCGRegSet reg1
, TCGRegSet reg2
)
1656 tcg_regset_andnot(reg_ct
, reg1
, reg2
);
1658 /* first try free registers */
1659 for(i
= 0; i
< ARRAY_SIZE(tcg_target_reg_alloc_order
); i
++) {
1660 reg
= tcg_target_reg_alloc_order
[i
];
1661 if (tcg_regset_test_reg(reg_ct
, reg
) && s
->reg_to_temp
[reg
] == -1)
1665 /* XXX: do better spill choice */
1666 for(i
= 0; i
< ARRAY_SIZE(tcg_target_reg_alloc_order
); i
++) {
1667 reg
= tcg_target_reg_alloc_order
[i
];
1668 if (tcg_regset_test_reg(reg_ct
, reg
)) {
1669 tcg_reg_free(s
, reg
);
1677 /* mark a temporary as dead. */
1678 static inline void temp_dead(TCGContext
*s
, int temp
)
1682 ts
= &s
->temps
[temp
];
1683 if (!ts
->fixed_reg
) {
1684 if (ts
->val_type
== TEMP_VAL_REG
) {
1685 s
->reg_to_temp
[ts
->reg
] = -1;
1687 if (temp
< s
->nb_globals
|| ts
->temp_local
) {
1688 ts
->val_type
= TEMP_VAL_MEM
;
1690 ts
->val_type
= TEMP_VAL_DEAD
;
1695 /* sync a temporary to memory. 'allocated_regs' is used in case a
1696 temporary registers needs to be allocated to store a constant. */
1697 static inline void temp_sync(TCGContext
*s
, int temp
, TCGRegSet allocated_regs
)
1701 ts
= &s
->temps
[temp
];
1702 if (!ts
->fixed_reg
) {
1703 switch(ts
->val_type
) {
1704 case TEMP_VAL_CONST
:
1705 ts
->reg
= tcg_reg_alloc(s
, tcg_target_available_regs
[ts
->type
],
1707 ts
->val_type
= TEMP_VAL_REG
;
1708 s
->reg_to_temp
[ts
->reg
] = temp
;
1709 ts
->mem_coherent
= 0;
1710 tcg_out_movi(s
, ts
->type
, ts
->reg
, ts
->val
);
1713 tcg_reg_sync(s
, ts
->reg
);
1724 /* save a temporary to memory. 'allocated_regs' is used in case a
1725 temporary registers needs to be allocated to store a constant. */
1726 static inline void temp_save(TCGContext
*s
, int temp
, TCGRegSet allocated_regs
)
1728 #ifdef USE_LIVENESS_ANALYSIS
1729 /* The liveness analysis already ensures that globals are back
1730 in memory. Keep an assert for safety. */
1731 assert(s
->temps
[temp
].val_type
== TEMP_VAL_MEM
|| s
->temps
[temp
].fixed_reg
);
1733 temp_sync(s
, temp
, allocated_regs
);
1738 /* save globals to their canonical location and assume they can be
1739 modified be the following code. 'allocated_regs' is used in case a
1740 temporary registers needs to be allocated to store a constant. */
1741 static void save_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
1745 for(i
= 0; i
< s
->nb_globals
; i
++) {
1746 temp_save(s
, i
, allocated_regs
);
1750 /* sync globals to their canonical location and assume they can be
1751 read by the following code. 'allocated_regs' is used in case a
1752 temporary registers needs to be allocated to store a constant. */
1753 static void sync_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
1757 for (i
= 0; i
< s
->nb_globals
; i
++) {
1758 #ifdef USE_LIVENESS_ANALYSIS
1759 assert(s
->temps
[i
].val_type
!= TEMP_VAL_REG
|| s
->temps
[i
].fixed_reg
||
1760 s
->temps
[i
].mem_coherent
);
1762 temp_sync(s
, i
, allocated_regs
);
1767 /* at the end of a basic block, we assume all temporaries are dead and
1768 all globals are stored at their canonical location. */
1769 static void tcg_reg_alloc_bb_end(TCGContext
*s
, TCGRegSet allocated_regs
)
1774 for(i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
1776 if (ts
->temp_local
) {
1777 temp_save(s
, i
, allocated_regs
);
1779 #ifdef USE_LIVENESS_ANALYSIS
1780 /* The liveness analysis already ensures that temps are dead.
1781 Keep an assert for safety. */
1782 assert(ts
->val_type
== TEMP_VAL_DEAD
);
1789 save_globals(s
, allocated_regs
);
1792 #define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
1793 #define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
1795 static void tcg_reg_alloc_movi(TCGContext
*s
, const TCGArg
*args
,
1796 uint16_t dead_args
, uint8_t sync_args
)
1799 tcg_target_ulong val
;
1801 ots
= &s
->temps
[args
[0]];
1804 if (ots
->fixed_reg
) {
1805 /* for fixed registers, we do not do any constant
1807 tcg_out_movi(s
, ots
->type
, ots
->reg
, val
);
1809 /* The movi is not explicitly generated here */
1810 if (ots
->val_type
== TEMP_VAL_REG
)
1811 s
->reg_to_temp
[ots
->reg
] = -1;
1812 ots
->val_type
= TEMP_VAL_CONST
;
1815 if (NEED_SYNC_ARG(0)) {
1816 temp_sync(s
, args
[0], s
->reserved_regs
);
1818 if (IS_DEAD_ARG(0)) {
1819 temp_dead(s
, args
[0]);
1823 static void tcg_reg_alloc_mov(TCGContext
*s
, const TCGOpDef
*def
,
1824 const TCGArg
*args
, uint16_t dead_args
,
1827 TCGRegSet allocated_regs
;
1829 const TCGArgConstraint
*arg_ct
, *oarg_ct
;
1831 tcg_regset_set(allocated_regs
, s
->reserved_regs
);
1832 ots
= &s
->temps
[args
[0]];
1833 ts
= &s
->temps
[args
[1]];
1834 oarg_ct
= &def
->args_ct
[0];
1835 arg_ct
= &def
->args_ct
[1];
1837 /* If the source value is not in a register, and we're going to be
1838 forced to have it in a register in order to perform the copy,
1839 then copy the SOURCE value into its own register first. That way
1840 we don't have to reload SOURCE the next time it is used. */
1841 if (((NEED_SYNC_ARG(0) || ots
->fixed_reg
) && ts
->val_type
!= TEMP_VAL_REG
)
1842 || ts
->val_type
== TEMP_VAL_MEM
) {
1843 ts
->reg
= tcg_reg_alloc(s
, arg_ct
->u
.regs
, allocated_regs
);
1844 if (ts
->val_type
== TEMP_VAL_MEM
) {
1845 tcg_out_ld(s
, ts
->type
, ts
->reg
, ts
->mem_reg
, ts
->mem_offset
);
1846 ts
->mem_coherent
= 1;
1847 } else if (ts
->val_type
== TEMP_VAL_CONST
) {
1848 tcg_out_movi(s
, ts
->type
, ts
->reg
, ts
->val
);
1850 s
->reg_to_temp
[ts
->reg
] = args
[1];
1851 ts
->val_type
= TEMP_VAL_REG
;
1854 if (IS_DEAD_ARG(0) && !ots
->fixed_reg
) {
1855 /* mov to a non-saved dead register makes no sense (even with
1856 liveness analysis disabled). */
1857 assert(NEED_SYNC_ARG(0));
1858 /* The code above should have moved the temp to a register. */
1859 assert(ts
->val_type
== TEMP_VAL_REG
);
1860 if (!ots
->mem_allocated
) {
1861 temp_allocate_frame(s
, args
[0]);
1863 tcg_out_st(s
, ots
->type
, ts
->reg
, ots
->mem_reg
, ots
->mem_offset
);
1864 if (IS_DEAD_ARG(1)) {
1865 temp_dead(s
, args
[1]);
1867 temp_dead(s
, args
[0]);
1868 } else if (ts
->val_type
== TEMP_VAL_CONST
) {
1869 /* propagate constant */
1870 if (ots
->val_type
== TEMP_VAL_REG
) {
1871 s
->reg_to_temp
[ots
->reg
] = -1;
1873 ots
->val_type
= TEMP_VAL_CONST
;
1876 /* The code in the first if block should have moved the
1877 temp to a register. */
1878 assert(ts
->val_type
== TEMP_VAL_REG
);
1879 if (IS_DEAD_ARG(1) && !ts
->fixed_reg
&& !ots
->fixed_reg
) {
1880 /* the mov can be suppressed */
1881 if (ots
->val_type
== TEMP_VAL_REG
) {
1882 s
->reg_to_temp
[ots
->reg
] = -1;
1885 temp_dead(s
, args
[1]);
1887 if (ots
->val_type
!= TEMP_VAL_REG
) {
1888 /* When allocating a new register, make sure to not spill the
1890 tcg_regset_set_reg(allocated_regs
, ts
->reg
);
1891 ots
->reg
= tcg_reg_alloc(s
, oarg_ct
->u
.regs
, allocated_regs
);
1893 tcg_out_mov(s
, ots
->type
, ots
->reg
, ts
->reg
);
1895 ots
->val_type
= TEMP_VAL_REG
;
1896 ots
->mem_coherent
= 0;
1897 s
->reg_to_temp
[ots
->reg
] = args
[0];
1898 if (NEED_SYNC_ARG(0)) {
1899 tcg_reg_sync(s
, ots
->reg
);
1904 static void tcg_reg_alloc_op(TCGContext
*s
,
1905 const TCGOpDef
*def
, TCGOpcode opc
,
1906 const TCGArg
*args
, uint16_t dead_args
,
1909 TCGRegSet allocated_regs
;
1910 int i
, k
, nb_iargs
, nb_oargs
, reg
;
1912 const TCGArgConstraint
*arg_ct
;
1914 TCGArg new_args
[TCG_MAX_OP_ARGS
];
1915 int const_args
[TCG_MAX_OP_ARGS
];
1917 nb_oargs
= def
->nb_oargs
;
1918 nb_iargs
= def
->nb_iargs
;
1920 /* copy constants */
1921 memcpy(new_args
+ nb_oargs
+ nb_iargs
,
1922 args
+ nb_oargs
+ nb_iargs
,
1923 sizeof(TCGArg
) * def
->nb_cargs
);
1925 /* satisfy input constraints */
1926 tcg_regset_set(allocated_regs
, s
->reserved_regs
);
1927 for(k
= 0; k
< nb_iargs
; k
++) {
1928 i
= def
->sorted_args
[nb_oargs
+ k
];
1930 arg_ct
= &def
->args_ct
[i
];
1931 ts
= &s
->temps
[arg
];
1932 if (ts
->val_type
== TEMP_VAL_MEM
) {
1933 reg
= tcg_reg_alloc(s
, arg_ct
->u
.regs
, allocated_regs
);
1934 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_reg
, ts
->mem_offset
);
1935 ts
->val_type
= TEMP_VAL_REG
;
1937 ts
->mem_coherent
= 1;
1938 s
->reg_to_temp
[reg
] = arg
;
1939 } else if (ts
->val_type
== TEMP_VAL_CONST
) {
1940 if (tcg_target_const_match(ts
->val
, arg_ct
)) {
1941 /* constant is OK for instruction */
1943 new_args
[i
] = ts
->val
;
1946 /* need to move to a register */
1947 reg
= tcg_reg_alloc(s
, arg_ct
->u
.regs
, allocated_regs
);
1948 tcg_out_movi(s
, ts
->type
, reg
, ts
->val
);
1949 ts
->val_type
= TEMP_VAL_REG
;
1951 ts
->mem_coherent
= 0;
1952 s
->reg_to_temp
[reg
] = arg
;
1955 assert(ts
->val_type
== TEMP_VAL_REG
);
1956 if (arg_ct
->ct
& TCG_CT_IALIAS
) {
1957 if (ts
->fixed_reg
) {
1958 /* if fixed register, we must allocate a new register
1959 if the alias is not the same register */
1960 if (arg
!= args
[arg_ct
->alias_index
])
1961 goto allocate_in_reg
;
1963 /* if the input is aliased to an output and if it is
1964 not dead after the instruction, we must allocate
1965 a new register and move it */
1966 if (!IS_DEAD_ARG(i
)) {
1967 goto allocate_in_reg
;
1972 if (tcg_regset_test_reg(arg_ct
->u
.regs
, reg
)) {
1973 /* nothing to do : the constraint is satisfied */
1976 /* allocate a new register matching the constraint
1977 and move the temporary register into it */
1978 reg
= tcg_reg_alloc(s
, arg_ct
->u
.regs
, allocated_regs
);
1979 tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
);
1983 tcg_regset_set_reg(allocated_regs
, reg
);
1987 /* mark dead temporaries and free the associated registers */
1988 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
1989 if (IS_DEAD_ARG(i
)) {
1990 temp_dead(s
, args
[i
]);
1994 if (def
->flags
& TCG_OPF_BB_END
) {
1995 tcg_reg_alloc_bb_end(s
, allocated_regs
);
1997 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
1998 /* XXX: permit generic clobber register list ? */
1999 for(reg
= 0; reg
< TCG_TARGET_NB_REGS
; reg
++) {
2000 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, reg
)) {
2001 tcg_reg_free(s
, reg
);
2005 if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
2006 /* sync globals if the op has side effects and might trigger
2008 sync_globals(s
, allocated_regs
);
2011 /* satisfy the output constraints */
2012 tcg_regset_set(allocated_regs
, s
->reserved_regs
);
2013 for(k
= 0; k
< nb_oargs
; k
++) {
2014 i
= def
->sorted_args
[k
];
2016 arg_ct
= &def
->args_ct
[i
];
2017 ts
= &s
->temps
[arg
];
2018 if (arg_ct
->ct
& TCG_CT_ALIAS
) {
2019 reg
= new_args
[arg_ct
->alias_index
];
2021 /* if fixed register, we try to use it */
2023 if (ts
->fixed_reg
&&
2024 tcg_regset_test_reg(arg_ct
->u
.regs
, reg
)) {
2027 reg
= tcg_reg_alloc(s
, arg_ct
->u
.regs
, allocated_regs
);
2029 tcg_regset_set_reg(allocated_regs
, reg
);
2030 /* if a fixed register is used, then a move will be done afterwards */
2031 if (!ts
->fixed_reg
) {
2032 if (ts
->val_type
== TEMP_VAL_REG
) {
2033 s
->reg_to_temp
[ts
->reg
] = -1;
2035 ts
->val_type
= TEMP_VAL_REG
;
2037 /* temp value is modified, so the value kept in memory is
2038 potentially not the same */
2039 ts
->mem_coherent
= 0;
2040 s
->reg_to_temp
[reg
] = arg
;
2047 /* emit instruction */
2048 tcg_out_op(s
, opc
, new_args
, const_args
);
2050 /* move the outputs in the correct register if needed */
2051 for(i
= 0; i
< nb_oargs
; i
++) {
2052 ts
= &s
->temps
[args
[i
]];
2054 if (ts
->fixed_reg
&& ts
->reg
!= reg
) {
2055 tcg_out_mov(s
, ts
->type
, ts
->reg
, reg
);
2057 if (NEED_SYNC_ARG(i
)) {
2058 tcg_reg_sync(s
, reg
);
2060 if (IS_DEAD_ARG(i
)) {
2061 temp_dead(s
, args
[i
]);
2066 #ifdef TCG_TARGET_STACK_GROWSUP
2067 #define STACK_DIR(x) (-(x))
2069 #define STACK_DIR(x) (x)
2072 static int tcg_reg_alloc_call(TCGContext
*s
, const TCGOpDef
*def
,
2073 TCGOpcode opc
, const TCGArg
*args
,
2074 uint16_t dead_args
, uint8_t sync_args
)
2076 int nb_iargs
, nb_oargs
, flags
, nb_regs
, i
, reg
, nb_params
;
2077 TCGArg arg
, func_arg
;
2079 tcg_target_long stack_offset
, call_stack_size
, func_addr
;
2080 int const_func_arg
, allocate_args
;
2081 TCGRegSet allocated_regs
;
2082 const TCGArgConstraint
*arg_ct
;
2086 nb_oargs
= arg
>> 16;
2087 nb_iargs
= arg
& 0xffff;
2088 nb_params
= nb_iargs
- 1;
2090 flags
= args
[nb_oargs
+ nb_iargs
];
2092 nb_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
2093 if (nb_regs
> nb_params
)
2094 nb_regs
= nb_params
;
2096 /* assign stack slots first */
2097 call_stack_size
= (nb_params
- nb_regs
) * sizeof(tcg_target_long
);
2098 call_stack_size
= (call_stack_size
+ TCG_TARGET_STACK_ALIGN
- 1) &
2099 ~(TCG_TARGET_STACK_ALIGN
- 1);
2100 allocate_args
= (call_stack_size
> TCG_STATIC_CALL_ARGS_SIZE
);
2101 if (allocate_args
) {
2102 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2103 preallocate call stack */
2107 stack_offset
= TCG_TARGET_CALL_STACK_OFFSET
;
2108 for(i
= nb_regs
; i
< nb_params
; i
++) {
2109 arg
= args
[nb_oargs
+ i
];
2110 #ifdef TCG_TARGET_STACK_GROWSUP
2111 stack_offset
-= sizeof(tcg_target_long
);
2113 if (arg
!= TCG_CALL_DUMMY_ARG
) {
2114 ts
= &s
->temps
[arg
];
2115 if (ts
->val_type
== TEMP_VAL_REG
) {
2116 tcg_out_st(s
, ts
->type
, ts
->reg
, TCG_REG_CALL_STACK
, stack_offset
);
2117 } else if (ts
->val_type
== TEMP_VAL_MEM
) {
2118 reg
= tcg_reg_alloc(s
, tcg_target_available_regs
[ts
->type
],
2120 /* XXX: not correct if reading values from the stack */
2121 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_reg
, ts
->mem_offset
);
2122 tcg_out_st(s
, ts
->type
, reg
, TCG_REG_CALL_STACK
, stack_offset
);
2123 } else if (ts
->val_type
== TEMP_VAL_CONST
) {
2124 reg
= tcg_reg_alloc(s
, tcg_target_available_regs
[ts
->type
],
2126 /* XXX: sign extend may be needed on some targets */
2127 tcg_out_movi(s
, ts
->type
, reg
, ts
->val
);
2128 tcg_out_st(s
, ts
->type
, reg
, TCG_REG_CALL_STACK
, stack_offset
);
2133 #ifndef TCG_TARGET_STACK_GROWSUP
2134 stack_offset
+= sizeof(tcg_target_long
);
2138 /* assign input registers */
2139 tcg_regset_set(allocated_regs
, s
->reserved_regs
);
2140 for(i
= 0; i
< nb_regs
; i
++) {
2141 arg
= args
[nb_oargs
+ i
];
2142 if (arg
!= TCG_CALL_DUMMY_ARG
) {
2143 ts
= &s
->temps
[arg
];
2144 reg
= tcg_target_call_iarg_regs
[i
];
2145 tcg_reg_free(s
, reg
);
2146 if (ts
->val_type
== TEMP_VAL_REG
) {
2147 if (ts
->reg
!= reg
) {
2148 tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
);
2150 } else if (ts
->val_type
== TEMP_VAL_MEM
) {
2151 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_reg
, ts
->mem_offset
);
2152 } else if (ts
->val_type
== TEMP_VAL_CONST
) {
2153 /* XXX: sign extend ? */
2154 tcg_out_movi(s
, ts
->type
, reg
, ts
->val
);
2158 tcg_regset_set_reg(allocated_regs
, reg
);
2162 /* assign function address */
2163 func_arg
= args
[nb_oargs
+ nb_iargs
- 1];
2164 arg_ct
= &def
->args_ct
[0];
2165 ts
= &s
->temps
[func_arg
];
2166 func_addr
= ts
->val
;
2168 if (ts
->val_type
== TEMP_VAL_MEM
) {
2169 reg
= tcg_reg_alloc(s
, arg_ct
->u
.regs
, allocated_regs
);
2170 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_reg
, ts
->mem_offset
);
2172 tcg_regset_set_reg(allocated_regs
, reg
);
2173 } else if (ts
->val_type
== TEMP_VAL_REG
) {
2175 if (!tcg_regset_test_reg(arg_ct
->u
.regs
, reg
)) {
2176 reg
= tcg_reg_alloc(s
, arg_ct
->u
.regs
, allocated_regs
);
2177 tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
);
2180 tcg_regset_set_reg(allocated_regs
, reg
);
2181 } else if (ts
->val_type
== TEMP_VAL_CONST
) {
2182 if (tcg_target_const_match(func_addr
, arg_ct
)) {
2184 func_arg
= func_addr
;
2186 reg
= tcg_reg_alloc(s
, arg_ct
->u
.regs
, allocated_regs
);
2187 tcg_out_movi(s
, ts
->type
, reg
, func_addr
);
2189 tcg_regset_set_reg(allocated_regs
, reg
);
2196 /* mark dead temporaries and free the associated registers */
2197 for(i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
2198 if (IS_DEAD_ARG(i
)) {
2199 temp_dead(s
, args
[i
]);
2203 /* clobber call registers */
2204 for(reg
= 0; reg
< TCG_TARGET_NB_REGS
; reg
++) {
2205 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, reg
)) {
2206 tcg_reg_free(s
, reg
);
2210 /* Save globals if they might be written by the helper, sync them if
2211 they might be read. */
2212 if (flags
& TCG_CALL_NO_READ_GLOBALS
) {
2214 } else if (flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
2215 sync_globals(s
, allocated_regs
);
2217 save_globals(s
, allocated_regs
);
2220 tcg_out_op(s
, opc
, &func_arg
, &const_func_arg
);
2222 /* assign output registers and emit moves if needed */
2223 for(i
= 0; i
< nb_oargs
; i
++) {
2225 ts
= &s
->temps
[arg
];
2226 reg
= tcg_target_call_oarg_regs
[i
];
2227 assert(s
->reg_to_temp
[reg
] == -1);
2228 if (ts
->fixed_reg
) {
2229 if (ts
->reg
!= reg
) {
2230 tcg_out_mov(s
, ts
->type
, ts
->reg
, reg
);
2233 if (ts
->val_type
== TEMP_VAL_REG
) {
2234 s
->reg_to_temp
[ts
->reg
] = -1;
2236 ts
->val_type
= TEMP_VAL_REG
;
2238 ts
->mem_coherent
= 0;
2239 s
->reg_to_temp
[reg
] = arg
;
2240 if (NEED_SYNC_ARG(i
)) {
2241 tcg_reg_sync(s
, reg
);
2243 if (IS_DEAD_ARG(i
)) {
2244 temp_dead(s
, args
[i
]);
2249 return nb_iargs
+ nb_oargs
+ def
->nb_cargs
+ 1;
2252 #ifdef CONFIG_PROFILER
2254 static int64_t tcg_table_op_count
[NB_OPS
];
2256 static void dump_op_count(void)
2260 f
= fopen("/tmp/op.log", "w");
2261 for(i
= INDEX_op_end
; i
< NB_OPS
; i
++) {
2262 fprintf(f
, "%s %" PRId64
"\n", tcg_op_defs
[i
].name
, tcg_table_op_count
[i
]);
2269 static inline int tcg_gen_code_common(TCGContext
*s
, uint8_t *gen_code_buf
,
2274 const TCGOpDef
*def
;
2278 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP
))) {
2285 #ifdef CONFIG_PROFILER
2286 s
->opt_time
-= profile_getclock();
2289 #ifdef USE_TCG_OPTIMIZATIONS
2290 s
->gen_opparam_ptr
=
2291 tcg_optimize(s
, s
->gen_opc_ptr
, s
->gen_opparam_buf
, tcg_op_defs
);
2294 #ifdef CONFIG_PROFILER
2295 s
->opt_time
+= profile_getclock();
2296 s
->la_time
-= profile_getclock();
2299 tcg_liveness_analysis(s
);
2301 #ifdef CONFIG_PROFILER
2302 s
->la_time
+= profile_getclock();
2306 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT
))) {
2307 qemu_log("OP after optimization and liveness analysis:\n");
2313 tcg_reg_alloc_start(s
);
2315 s
->code_buf
= gen_code_buf
;
2316 s
->code_ptr
= gen_code_buf
;
2318 args
= s
->gen_opparam_buf
;
2322 opc
= s
->gen_opc_buf
[op_index
];
2323 #ifdef CONFIG_PROFILER
2324 tcg_table_op_count
[opc
]++;
2326 def
= &tcg_op_defs
[opc
];
2328 printf("%s: %d %d %d\n", def
->name
,
2329 def
->nb_oargs
, def
->nb_iargs
, def
->nb_cargs
);
2333 case INDEX_op_mov_i32
:
2334 case INDEX_op_mov_i64
:
2335 tcg_reg_alloc_mov(s
, def
, args
, s
->op_dead_args
[op_index
],
2336 s
->op_sync_args
[op_index
]);
2338 case INDEX_op_movi_i32
:
2339 case INDEX_op_movi_i64
:
2340 tcg_reg_alloc_movi(s
, args
, s
->op_dead_args
[op_index
],
2341 s
->op_sync_args
[op_index
]);
2343 case INDEX_op_debug_insn_start
:
2344 /* debug instruction */
2354 case INDEX_op_discard
:
2355 temp_dead(s
, args
[0]);
2357 case INDEX_op_set_label
:
2358 tcg_reg_alloc_bb_end(s
, s
->reserved_regs
);
2359 tcg_out_label(s
, args
[0], s
->code_ptr
);
2362 args
+= tcg_reg_alloc_call(s
, def
, opc
, args
,
2363 s
->op_dead_args
[op_index
],
2364 s
->op_sync_args
[op_index
]);
2369 /* Sanity check that we've not introduced any unhandled opcodes. */
2370 if (def
->flags
& TCG_OPF_NOT_PRESENT
) {
2373 /* Note: in order to speed up the code, it would be much
2374 faster to have specialized register allocator functions for
2375 some common argument patterns */
2376 tcg_reg_alloc_op(s
, def
, opc
, args
, s
->op_dead_args
[op_index
],
2377 s
->op_sync_args
[op_index
]);
2380 args
+= def
->nb_args
;
2382 if (search_pc
>= 0 && search_pc
< s
->code_ptr
- gen_code_buf
) {
2391 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
2392 /* Generate TB finalization at the end of block */
2393 tcg_out_tb_finalize(s
);
2398 int tcg_gen_code(TCGContext
*s
, uint8_t *gen_code_buf
)
2400 #ifdef CONFIG_PROFILER
2403 n
= (s
->gen_opc_ptr
- s
->gen_opc_buf
);
2405 if (n
> s
->op_count_max
)
2406 s
->op_count_max
= n
;
2408 s
->temp_count
+= s
->nb_temps
;
2409 if (s
->nb_temps
> s
->temp_count_max
)
2410 s
->temp_count_max
= s
->nb_temps
;
2414 tcg_gen_code_common(s
, gen_code_buf
, -1);
2416 /* flush instruction cache */
2417 flush_icache_range((uintptr_t)gen_code_buf
, (uintptr_t)s
->code_ptr
);
2419 return s
->code_ptr
- gen_code_buf
;
2422 /* Return the index of the micro operation such as the pc after is <
2423 offset bytes from the start of the TB. The contents of gen_code_buf must
2424 not be changed, though writing the same values is ok.
2425 Return -1 if not found. */
2426 int tcg_gen_code_search_pc(TCGContext
*s
, uint8_t *gen_code_buf
, long offset
)
2428 return tcg_gen_code_common(s
, gen_code_buf
, offset
);
2431 #ifdef CONFIG_PROFILER
2432 void tcg_dump_info(FILE *f
, fprintf_function cpu_fprintf
)
2434 TCGContext
*s
= &tcg_ctx
;
2437 tot
= s
->interm_time
+ s
->code_time
;
2438 cpu_fprintf(f
, "JIT cycles %" PRId64
" (%0.3f s at 2.4 GHz)\n",
2440 cpu_fprintf(f
, "translated TBs %" PRId64
" (aborted=%" PRId64
" %0.1f%%)\n",
2442 s
->tb_count1
- s
->tb_count
,
2443 s
->tb_count1
? (double)(s
->tb_count1
- s
->tb_count
) / s
->tb_count1
* 100.0 : 0);
2444 cpu_fprintf(f
, "avg ops/TB %0.1f max=%d\n",
2445 s
->tb_count
? (double)s
->op_count
/ s
->tb_count
: 0, s
->op_count_max
);
2446 cpu_fprintf(f
, "deleted ops/TB %0.2f\n",
2448 (double)s
->del_op_count
/ s
->tb_count
: 0);
2449 cpu_fprintf(f
, "avg temps/TB %0.2f max=%d\n",
2451 (double)s
->temp_count
/ s
->tb_count
: 0,
2454 cpu_fprintf(f
, "cycles/op %0.1f\n",
2455 s
->op_count
? (double)tot
/ s
->op_count
: 0);
2456 cpu_fprintf(f
, "cycles/in byte %0.1f\n",
2457 s
->code_in_len
? (double)tot
/ s
->code_in_len
: 0);
2458 cpu_fprintf(f
, "cycles/out byte %0.1f\n",
2459 s
->code_out_len
? (double)tot
/ s
->code_out_len
: 0);
2462 cpu_fprintf(f
, " gen_interm time %0.1f%%\n",
2463 (double)s
->interm_time
/ tot
* 100.0);
2464 cpu_fprintf(f
, " gen_code time %0.1f%%\n",
2465 (double)s
->code_time
/ tot
* 100.0);
2466 cpu_fprintf(f
, "optim./code time %0.1f%%\n",
2467 (double)s
->opt_time
/ (s
->code_time
? s
->code_time
: 1)
2469 cpu_fprintf(f
, "liveness/code time %0.1f%%\n",
2470 (double)s
->la_time
/ (s
->code_time
? s
->code_time
: 1) * 100.0);
2471 cpu_fprintf(f
, "cpu_restore count %" PRId64
"\n",
2473 cpu_fprintf(f
, " avg cycles %0.1f\n",
2474 s
->restore_count
? (double)s
->restore_time
/ s
->restore_count
: 0);
2479 void tcg_dump_info(FILE *f
, fprintf_function cpu_fprintf
)
2481 cpu_fprintf(f
, "[TCG profiler not compiled]\n");
2485 #ifdef ELF_HOST_MACHINE
2486 /* In order to use this feature, the backend needs to do three things:
2488 (1) Define ELF_HOST_MACHINE to indicate both what value to
2489 put into the ELF image and to indicate support for the feature.
2491 (2) Define tcg_register_jit. This should create a buffer containing
2492 the contents of a .debug_frame section that describes the post-
2493 prologue unwind info for the tcg machine.
2495 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
2498 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
2505 struct jit_code_entry
{
2506 struct jit_code_entry
*next_entry
;
2507 struct jit_code_entry
*prev_entry
;
2508 const void *symfile_addr
;
2509 uint64_t symfile_size
;
2512 struct jit_descriptor
{
2514 uint32_t action_flag
;
2515 struct jit_code_entry
*relevant_entry
;
2516 struct jit_code_entry
*first_entry
;
2519 void __jit_debug_register_code(void) __attribute__((noinline
));
2520 void __jit_debug_register_code(void)
2525 /* Must statically initialize the version, because GDB may check
2526 the version before we can set it. */
2527 struct jit_descriptor __jit_debug_descriptor
= { 1, 0, 0, 0 };
2529 /* End GDB interface. */
2531 static int find_string(const char *strtab
, const char *str
)
2533 const char *p
= strtab
+ 1;
2536 if (strcmp(p
, str
) == 0) {
2543 static void tcg_register_jit_int(void *buf_ptr
, size_t buf_size
,
2544 void *debug_frame
, size_t debug_frame_size
)
2546 struct __attribute__((packed
)) DebugInfo
{
2553 uintptr_t cu_low_pc
;
2554 uintptr_t cu_high_pc
;
2557 uintptr_t fn_low_pc
;
2558 uintptr_t fn_high_pc
;
2567 struct DebugInfo di
;
2572 struct ElfImage
*img
;
2574 static const struct ElfImage img_template
= {
2576 .e_ident
[EI_MAG0
] = ELFMAG0
,
2577 .e_ident
[EI_MAG1
] = ELFMAG1
,
2578 .e_ident
[EI_MAG2
] = ELFMAG2
,
2579 .e_ident
[EI_MAG3
] = ELFMAG3
,
2580 .e_ident
[EI_CLASS
] = ELF_CLASS
,
2581 .e_ident
[EI_DATA
] = ELF_DATA
,
2582 .e_ident
[EI_VERSION
] = EV_CURRENT
,
2584 .e_machine
= ELF_HOST_MACHINE
,
2585 .e_version
= EV_CURRENT
,
2586 .e_phoff
= offsetof(struct ElfImage
, phdr
),
2587 .e_shoff
= offsetof(struct ElfImage
, shdr
),
2588 .e_ehsize
= sizeof(ElfW(Shdr
)),
2589 .e_phentsize
= sizeof(ElfW(Phdr
)),
2591 .e_shentsize
= sizeof(ElfW(Shdr
)),
2592 .e_shnum
= ARRAY_SIZE(img
->shdr
),
2593 .e_shstrndx
= ARRAY_SIZE(img
->shdr
) - 1,
2594 #ifdef ELF_HOST_FLAGS
2595 .e_flags
= ELF_HOST_FLAGS
,
2598 .e_ident
[EI_OSABI
] = ELF_OSABI
,
2606 [0] = { .sh_type
= SHT_NULL
},
2607 /* Trick: The contents of code_gen_buffer are not present in
2608 this fake ELF file; that got allocated elsewhere. Therefore
2609 we mark .text as SHT_NOBITS (similar to .bss) so that readers
2610 will not look for contents. We can record any address. */
2612 .sh_type
= SHT_NOBITS
,
2613 .sh_flags
= SHF_EXECINSTR
| SHF_ALLOC
,
2615 [2] = { /* .debug_info */
2616 .sh_type
= SHT_PROGBITS
,
2617 .sh_offset
= offsetof(struct ElfImage
, di
),
2618 .sh_size
= sizeof(struct DebugInfo
),
2620 [3] = { /* .debug_abbrev */
2621 .sh_type
= SHT_PROGBITS
,
2622 .sh_offset
= offsetof(struct ElfImage
, da
),
2623 .sh_size
= sizeof(img
->da
),
2625 [4] = { /* .debug_frame */
2626 .sh_type
= SHT_PROGBITS
,
2627 .sh_offset
= sizeof(struct ElfImage
),
2629 [5] = { /* .symtab */
2630 .sh_type
= SHT_SYMTAB
,
2631 .sh_offset
= offsetof(struct ElfImage
, sym
),
2632 .sh_size
= sizeof(img
->sym
),
2634 .sh_link
= ARRAY_SIZE(img
->shdr
) - 1,
2635 .sh_entsize
= sizeof(ElfW(Sym
)),
2637 [6] = { /* .strtab */
2638 .sh_type
= SHT_STRTAB
,
2639 .sh_offset
= offsetof(struct ElfImage
, str
),
2640 .sh_size
= sizeof(img
->str
),
2644 [1] = { /* code_gen_buffer */
2645 .st_info
= ELF_ST_INFO(STB_GLOBAL
, STT_FUNC
),
2650 .len
= sizeof(struct DebugInfo
) - 4,
2652 .ptr_size
= sizeof(void *),
2654 .cu_lang
= 0x8001, /* DW_LANG_Mips_Assembler */
2656 .fn_name
= "code_gen_buffer"
2659 1, /* abbrev number (the cu) */
2660 0x11, 1, /* DW_TAG_compile_unit, has children */
2661 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
2662 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2663 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2664 0, 0, /* end of abbrev */
2665 2, /* abbrev number (the fn) */
2666 0x2e, 0, /* DW_TAG_subprogram, no children */
2667 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
2668 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2669 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2670 0, 0, /* end of abbrev */
2671 0 /* no more abbrev */
2673 .str
= "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
2674 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
2677 /* We only need a single jit entry; statically allocate it. */
2678 static struct jit_code_entry one_entry
;
2680 uintptr_t buf
= (uintptr_t)buf_ptr
;
2681 size_t img_size
= sizeof(struct ElfImage
) + debug_frame_size
;
2683 img
= g_malloc(img_size
);
2684 *img
= img_template
;
2685 memcpy(img
+ 1, debug_frame
, debug_frame_size
);
2687 img
->phdr
.p_vaddr
= buf
;
2688 img
->phdr
.p_paddr
= buf
;
2689 img
->phdr
.p_memsz
= buf_size
;
2691 img
->shdr
[1].sh_name
= find_string(img
->str
, ".text");
2692 img
->shdr
[1].sh_addr
= buf
;
2693 img
->shdr
[1].sh_size
= buf_size
;
2695 img
->shdr
[2].sh_name
= find_string(img
->str
, ".debug_info");
2696 img
->shdr
[3].sh_name
= find_string(img
->str
, ".debug_abbrev");
2698 img
->shdr
[4].sh_name
= find_string(img
->str
, ".debug_frame");
2699 img
->shdr
[4].sh_size
= debug_frame_size
;
2701 img
->shdr
[5].sh_name
= find_string(img
->str
, ".symtab");
2702 img
->shdr
[6].sh_name
= find_string(img
->str
, ".strtab");
2704 img
->sym
[1].st_name
= find_string(img
->str
, "code_gen_buffer");
2705 img
->sym
[1].st_value
= buf
;
2706 img
->sym
[1].st_size
= buf_size
;
2708 img
->di
.cu_low_pc
= buf
;
2709 img
->di
.cu_high_pc
= buf
+ buf_size
;
2710 img
->di
.fn_low_pc
= buf
;
2711 img
->di
.fn_high_pc
= buf
+ buf_size
;
2714 /* Enable this block to be able to debug the ELF image file creation.
2715 One can use readelf, objdump, or other inspection utilities. */
2717 FILE *f
= fopen("/tmp/qemu.jit", "w+b");
2719 if (fwrite(img
, img_size
, 1, f
) != img_size
) {
2720 /* Avoid stupid unused return value warning for fwrite. */
2727 one_entry
.symfile_addr
= img
;
2728 one_entry
.symfile_size
= img_size
;
2730 __jit_debug_descriptor
.action_flag
= JIT_REGISTER_FN
;
2731 __jit_debug_descriptor
.relevant_entry
= &one_entry
;
2732 __jit_debug_descriptor
.first_entry
= &one_entry
;
2733 __jit_debug_register_code();
2736 /* No support for the feature. Provide the entry point expected by exec.c,
2737 and implement the internal function we declared earlier. */
2739 static void tcg_register_jit_int(void *buf
, size_t size
,
2740 void *debug_frame
, size_t debug_frame_size
)
2744 void tcg_register_jit(void *buf
, size_t buf_size
)
2747 #endif /* ELF_HOST_MACHINE */