2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
43 #define NO_CPU_IO_DEFS
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
57 # define ELF_CLASS ELFCLASS64
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
62 # define ELF_DATA ELFDATA2LSB
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
71 static void tcg_target_init(TCGContext
*s
);
72 static const TCGTargetOpDef
*tcg_target_op_def(TCGOpcode
);
73 static void tcg_target_qemu_prologue(TCGContext
*s
);
74 static bool patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
75 intptr_t value
, intptr_t addend
);
77 /* The CIE and FDE header definitions will be common to all hosts. */
79 uint32_t len
__attribute__((aligned((sizeof(void *)))));
85 uint8_t return_column
;
88 typedef struct QEMU_PACKED
{
89 uint32_t len
__attribute__((aligned((sizeof(void *)))));
93 } DebugFrameFDEHeader
;
95 typedef struct QEMU_PACKED
{
97 DebugFrameFDEHeader fde
;
100 static void tcg_register_jit_int(const void *buf
, size_t size
,
101 const void *debug_frame
,
102 size_t debug_frame_size
)
103 __attribute__((unused
));
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 static const char *target_parse_constraint(TCGArgConstraint
*ct
,
107 const char *ct_str
, TCGType type
);
108 static void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg1
,
110 static bool tcg_out_mov(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg
);
111 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
112 TCGReg ret
, tcg_target_long arg
);
113 static void tcg_out_op(TCGContext
*s
, TCGOpcode opc
, const TCGArg
*args
,
114 const int *const_args
);
115 #if TCG_TARGET_MAYBE_vec
116 static bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
117 TCGReg dst
, TCGReg src
);
118 static bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
119 TCGReg dst
, TCGReg base
, intptr_t offset
);
120 static void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
121 TCGReg dst
, int64_t arg
);
122 static void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
, unsigned vecl
,
123 unsigned vece
, const TCGArg
*args
,
124 const int *const_args
);
126 static inline bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
127 TCGReg dst
, TCGReg src
)
129 g_assert_not_reached();
131 static inline bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
132 TCGReg dst
, TCGReg base
, intptr_t offset
)
134 g_assert_not_reached();
136 static inline void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
137 TCGReg dst
, int64_t arg
)
139 g_assert_not_reached();
141 static inline void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
, unsigned vecl
,
142 unsigned vece
, const TCGArg
*args
,
143 const int *const_args
)
145 g_assert_not_reached();
148 static void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
, TCGReg arg1
,
150 static bool tcg_out_sti(TCGContext
*s
, TCGType type
, TCGArg val
,
151 TCGReg base
, intptr_t ofs
);
152 static void tcg_out_call(TCGContext
*s
, const tcg_insn_unit
*target
);
153 static int tcg_target_const_match(tcg_target_long val
, TCGType type
,
154 const TCGArgConstraint
*arg_ct
);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext
*s
);
159 #define TCG_HIGHWATER 1024
161 static TCGContext
**tcg_ctxs
;
162 static unsigned int n_tcg_ctxs
;
163 TCGv_env cpu_env
= 0;
164 const void *tcg_code_gen_epilogue
;
165 uintptr_t tcg_splitwx_diff
;
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn
*tcg_qemu_tb_exec
;
171 struct tcg_region_tree
{
174 /* padding to avoid false sharing is computed at run-time */
178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179 * dynamically allocate from as demand dictates. Given appropriate region
180 * sizing, this minimizes flushes even when some TCG threads generate a lot
181 * more code than others.
183 struct tcg_region_state
{
186 /* fields set at init time */
191 size_t size
; /* size of one region */
192 size_t stride
; /* .size + guard size */
194 /* fields protected by the lock */
195 size_t current
; /* current region index */
196 size_t agg_size_full
; /* aggregate size of full regions */
199 static struct tcg_region_state region
;
201 * This is an array of struct tcg_region_tree's, with padding.
202 * We use void * to simplify the computation of region_trees[i]; each
203 * struct is found every tree_size bytes.
205 static void *region_trees
;
206 static size_t tree_size
;
207 static TCGRegSet tcg_target_available_regs
[TCG_TYPE_COUNT
];
208 static TCGRegSet tcg_target_call_clobber_regs
;
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused
)) inline void tcg_out8(TCGContext
*s
, uint8_t v
)
216 static __attribute__((unused
)) inline void tcg_patch8(tcg_insn_unit
*p
,
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused
)) inline void tcg_out16(TCGContext
*s
, uint16_t v
)
226 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
229 tcg_insn_unit
*p
= s
->code_ptr
;
230 memcpy(p
, &v
, sizeof(v
));
231 s
->code_ptr
= p
+ (2 / TCG_TARGET_INSN_UNIT_SIZE
);
235 static __attribute__((unused
)) inline void tcg_patch16(tcg_insn_unit
*p
,
238 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
241 memcpy(p
, &v
, sizeof(v
));
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused
)) inline void tcg_out32(TCGContext
*s
, uint32_t v
)
249 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
252 tcg_insn_unit
*p
= s
->code_ptr
;
253 memcpy(p
, &v
, sizeof(v
));
254 s
->code_ptr
= p
+ (4 / TCG_TARGET_INSN_UNIT_SIZE
);
258 static __attribute__((unused
)) inline void tcg_patch32(tcg_insn_unit
*p
,
261 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
264 memcpy(p
, &v
, sizeof(v
));
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused
)) inline void tcg_out64(TCGContext
*s
, uint64_t v
)
272 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
275 tcg_insn_unit
*p
= s
->code_ptr
;
276 memcpy(p
, &v
, sizeof(v
));
277 s
->code_ptr
= p
+ (8 / TCG_TARGET_INSN_UNIT_SIZE
);
281 static __attribute__((unused
)) inline void tcg_patch64(tcg_insn_unit
*p
,
284 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
287 memcpy(p
, &v
, sizeof(v
));
292 /* label relocation processing */
294 static void tcg_out_reloc(TCGContext
*s
, tcg_insn_unit
*code_ptr
, int type
,
295 TCGLabel
*l
, intptr_t addend
)
297 TCGRelocation
*r
= tcg_malloc(sizeof(TCGRelocation
));
302 QSIMPLEQ_INSERT_TAIL(&l
->relocs
, r
, next
);
305 static void tcg_out_label(TCGContext
*s
, TCGLabel
*l
)
307 tcg_debug_assert(!l
->has_value
);
309 l
->u
.value_ptr
= tcg_splitwx_to_rx(s
->code_ptr
);
312 TCGLabel
*gen_new_label(void)
314 TCGContext
*s
= tcg_ctx
;
315 TCGLabel
*l
= tcg_malloc(sizeof(TCGLabel
));
317 memset(l
, 0, sizeof(TCGLabel
));
318 l
->id
= s
->nb_labels
++;
319 QSIMPLEQ_INIT(&l
->relocs
);
321 QSIMPLEQ_INSERT_TAIL(&s
->labels
, l
, next
);
326 static bool tcg_resolve_relocs(TCGContext
*s
)
330 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
332 uintptr_t value
= l
->u
.value
;
334 QSIMPLEQ_FOREACH(r
, &l
->relocs
, next
) {
335 if (!patch_reloc(r
->ptr
, r
->type
, value
, r
->addend
)) {
343 static void set_jmp_reset_offset(TCGContext
*s
, int which
)
346 * We will check for overflow at the end of the opcode loop in
347 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
349 s
->tb_jmp_reset_offset
[which
] = tcg_current_code_size(s
);
352 #include "tcg-target.c.inc"
354 /* compare a pointer @ptr and a tb_tc @s */
355 static int ptr_cmp_tb_tc(const void *ptr
, const struct tb_tc
*s
)
357 if (ptr
>= s
->ptr
+ s
->size
) {
359 } else if (ptr
< s
->ptr
) {
365 static gint
tb_tc_cmp(gconstpointer ap
, gconstpointer bp
)
367 const struct tb_tc
*a
= ap
;
368 const struct tb_tc
*b
= bp
;
371 * When both sizes are set, we know this isn't a lookup.
372 * This is the most likely case: every TB must be inserted; lookups
373 * are a lot less frequent.
375 if (likely(a
->size
&& b
->size
)) {
376 if (a
->ptr
> b
->ptr
) {
378 } else if (a
->ptr
< b
->ptr
) {
381 /* a->ptr == b->ptr should happen only on deletions */
382 g_assert(a
->size
== b
->size
);
386 * All lookups have either .size field set to 0.
387 * From the glib sources we see that @ap is always the lookup key. However
388 * the docs provide no guarantee, so we just mark this case as likely.
390 if (likely(a
->size
== 0)) {
391 return ptr_cmp_tb_tc(a
->ptr
, b
);
393 return ptr_cmp_tb_tc(b
->ptr
, a
);
396 static void tcg_region_trees_init(void)
400 tree_size
= ROUND_UP(sizeof(struct tcg_region_tree
), qemu_dcache_linesize
);
401 region_trees
= qemu_memalign(qemu_dcache_linesize
, region
.n
* tree_size
);
402 for (i
= 0; i
< region
.n
; i
++) {
403 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
405 qemu_mutex_init(&rt
->lock
);
406 rt
->tree
= g_tree_new(tb_tc_cmp
);
410 static struct tcg_region_tree
*tc_ptr_to_region_tree(const void *cp
)
412 void *p
= tcg_splitwx_to_rw(cp
);
415 if (p
< region
.start_aligned
) {
418 ptrdiff_t offset
= p
- region
.start_aligned
;
420 if (offset
> region
.stride
* (region
.n
- 1)) {
421 region_idx
= region
.n
- 1;
423 region_idx
= offset
/ region
.stride
;
426 return region_trees
+ region_idx
* tree_size
;
429 void tcg_tb_insert(TranslationBlock
*tb
)
431 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree(tb
->tc
.ptr
);
433 qemu_mutex_lock(&rt
->lock
);
434 g_tree_insert(rt
->tree
, &tb
->tc
, tb
);
435 qemu_mutex_unlock(&rt
->lock
);
438 void tcg_tb_remove(TranslationBlock
*tb
)
440 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree(tb
->tc
.ptr
);
442 qemu_mutex_lock(&rt
->lock
);
443 g_tree_remove(rt
->tree
, &tb
->tc
);
444 qemu_mutex_unlock(&rt
->lock
);
448 * Find the TB 'tb' such that
449 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
450 * Return NULL if not found.
452 TranslationBlock
*tcg_tb_lookup(uintptr_t tc_ptr
)
454 struct tcg_region_tree
*rt
= tc_ptr_to_region_tree((void *)tc_ptr
);
455 TranslationBlock
*tb
;
456 struct tb_tc s
= { .ptr
= (void *)tc_ptr
};
458 qemu_mutex_lock(&rt
->lock
);
459 tb
= g_tree_lookup(rt
->tree
, &s
);
460 qemu_mutex_unlock(&rt
->lock
);
464 static void tcg_region_tree_lock_all(void)
468 for (i
= 0; i
< region
.n
; i
++) {
469 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
471 qemu_mutex_lock(&rt
->lock
);
475 static void tcg_region_tree_unlock_all(void)
479 for (i
= 0; i
< region
.n
; i
++) {
480 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
482 qemu_mutex_unlock(&rt
->lock
);
486 void tcg_tb_foreach(GTraverseFunc func
, gpointer user_data
)
490 tcg_region_tree_lock_all();
491 for (i
= 0; i
< region
.n
; i
++) {
492 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
494 g_tree_foreach(rt
->tree
, func
, user_data
);
496 tcg_region_tree_unlock_all();
499 size_t tcg_nb_tbs(void)
504 tcg_region_tree_lock_all();
505 for (i
= 0; i
< region
.n
; i
++) {
506 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
508 nb_tbs
+= g_tree_nnodes(rt
->tree
);
510 tcg_region_tree_unlock_all();
514 static gboolean
tcg_region_tree_traverse(gpointer k
, gpointer v
, gpointer data
)
516 TranslationBlock
*tb
= v
;
522 static void tcg_region_tree_reset_all(void)
526 tcg_region_tree_lock_all();
527 for (i
= 0; i
< region
.n
; i
++) {
528 struct tcg_region_tree
*rt
= region_trees
+ i
* tree_size
;
530 g_tree_foreach(rt
->tree
, tcg_region_tree_traverse
, NULL
);
531 /* Increment the refcount first so that destroy acts as a reset */
532 g_tree_ref(rt
->tree
);
533 g_tree_destroy(rt
->tree
);
535 tcg_region_tree_unlock_all();
538 static void tcg_region_bounds(size_t curr_region
, void **pstart
, void **pend
)
542 start
= region
.start_aligned
+ curr_region
* region
.stride
;
543 end
= start
+ region
.size
;
545 if (curr_region
== 0) {
546 start
= region
.start
;
548 if (curr_region
== region
.n
- 1) {
556 static void tcg_region_assign(TCGContext
*s
, size_t curr_region
)
560 tcg_region_bounds(curr_region
, &start
, &end
);
562 s
->code_gen_buffer
= start
;
563 s
->code_gen_ptr
= start
;
564 s
->code_gen_buffer_size
= end
- start
;
565 s
->code_gen_highwater
= end
- TCG_HIGHWATER
;
568 static bool tcg_region_alloc__locked(TCGContext
*s
)
570 if (region
.current
== region
.n
) {
573 tcg_region_assign(s
, region
.current
);
579 * Request a new region once the one in use has filled up.
580 * Returns true on error.
582 static bool tcg_region_alloc(TCGContext
*s
)
585 /* read the region size now; alloc__locked will overwrite it on success */
586 size_t size_full
= s
->code_gen_buffer_size
;
588 qemu_mutex_lock(®ion
.lock
);
589 err
= tcg_region_alloc__locked(s
);
591 region
.agg_size_full
+= size_full
- TCG_HIGHWATER
;
593 qemu_mutex_unlock(®ion
.lock
);
598 * Perform a context's first region allocation.
599 * This function does _not_ increment region.agg_size_full.
601 static inline bool tcg_region_initial_alloc__locked(TCGContext
*s
)
603 return tcg_region_alloc__locked(s
);
606 /* Call from a safe-work context */
607 void tcg_region_reset_all(void)
609 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
612 qemu_mutex_lock(®ion
.lock
);
614 region
.agg_size_full
= 0;
616 for (i
= 0; i
< n_ctxs
; i
++) {
617 TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
618 bool err
= tcg_region_initial_alloc__locked(s
);
622 qemu_mutex_unlock(®ion
.lock
);
624 tcg_region_tree_reset_all();
627 #ifdef CONFIG_USER_ONLY
628 static size_t tcg_n_regions(void)
634 * It is likely that some vCPUs will translate more code than others, so we
635 * first try to set more regions than max_cpus, with those regions being of
636 * reasonable size. If that's not possible we make do by evenly dividing
637 * the code_gen_buffer among the vCPUs.
639 static size_t tcg_n_regions(void)
643 /* Use a single region if all we have is one vCPU thread */
644 #if !defined(CONFIG_USER_ONLY)
645 MachineState
*ms
= MACHINE(qdev_get_machine());
646 unsigned int max_cpus
= ms
->smp
.max_cpus
;
648 if (max_cpus
== 1 || !qemu_tcg_mttcg_enabled()) {
652 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
653 for (i
= 8; i
> 0; i
--) {
654 size_t regions_per_thread
= i
;
657 region_size
= tcg_init_ctx
.code_gen_buffer_size
;
658 region_size
/= max_cpus
* regions_per_thread
;
660 if (region_size
>= 2 * 1024u * 1024) {
661 return max_cpus
* regions_per_thread
;
664 /* If we can't, then just allocate one region per vCPU thread */
670 * Initializes region partitioning.
672 * Called at init time from the parent thread (i.e. the one calling
673 * tcg_context_init), after the target's TCG globals have been set.
675 * Region partitioning works by splitting code_gen_buffer into separate regions,
676 * and then assigning regions to TCG threads so that the threads can translate
677 * code in parallel without synchronization.
679 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
680 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
681 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
682 * must have been parsed before calling this function, since it calls
683 * qemu_tcg_mttcg_enabled().
685 * In user-mode we use a single region. Having multiple regions in user-mode
686 * is not supported, because the number of vCPU threads (recall that each thread
687 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
688 * OS, and usually this number is huge (tens of thousands is not uncommon).
689 * Thus, given this large bound on the number of vCPU threads and the fact
690 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
691 * that the availability of at least one region per vCPU thread.
693 * However, this user-mode limitation is unlikely to be a significant problem
694 * in practice. Multi-threaded guests share most if not all of their translated
695 * code, which makes parallel code generation less appealing than in softmmu.
697 void tcg_region_init(void)
699 void *buf
= tcg_init_ctx
.code_gen_buffer
;
701 size_t size
= tcg_init_ctx
.code_gen_buffer_size
;
702 size_t page_size
= qemu_real_host_page_size
;
706 uintptr_t splitwx_diff
;
708 n_regions
= tcg_n_regions();
710 /* The first region will be 'aligned - buf' bytes larger than the others */
711 aligned
= QEMU_ALIGN_PTR_UP(buf
, page_size
);
712 g_assert(aligned
< tcg_init_ctx
.code_gen_buffer
+ size
);
714 * Make region_size a multiple of page_size, using aligned as the start.
715 * As a result of this we might end up with a few extra pages at the end of
716 * the buffer; we will assign those to the last region.
718 region_size
= (size
- (aligned
- buf
)) / n_regions
;
719 region_size
= QEMU_ALIGN_DOWN(region_size
, page_size
);
721 /* A region must have at least 2 pages; one code, one guard */
722 g_assert(region_size
>= 2 * page_size
);
724 /* init the region struct */
725 qemu_mutex_init(®ion
.lock
);
726 region
.n
= n_regions
;
727 region
.size
= region_size
- page_size
;
728 region
.stride
= region_size
;
730 region
.start_aligned
= aligned
;
731 /* page-align the end, since its last page will be a guard page */
732 region
.end
= QEMU_ALIGN_PTR_DOWN(buf
+ size
, page_size
);
733 /* account for that last guard page */
734 region
.end
-= page_size
;
736 /* set guard pages */
737 splitwx_diff
= tcg_splitwx_diff
;
738 for (i
= 0; i
< region
.n
; i
++) {
742 tcg_region_bounds(i
, &start
, &end
);
743 rc
= qemu_mprotect_none(end
, page_size
);
746 rc
= qemu_mprotect_none(end
+ splitwx_diff
, page_size
);
751 tcg_region_trees_init();
753 /* In user-mode we support only one ctx, so do the initial allocation now */
754 #ifdef CONFIG_USER_ONLY
756 bool err
= tcg_region_initial_alloc__locked(tcg_ctx
);
763 #ifdef CONFIG_DEBUG_TCG
764 const void *tcg_splitwx_to_rx(void *rw
)
766 /* Pass NULL pointers unchanged. */
768 g_assert(in_code_gen_buffer(rw
));
769 rw
+= tcg_splitwx_diff
;
774 void *tcg_splitwx_to_rw(const void *rx
)
776 /* Pass NULL pointers unchanged. */
778 rx
-= tcg_splitwx_diff
;
779 /* Assert that we end with a pointer in the rw region. */
780 g_assert(in_code_gen_buffer(rx
));
784 #endif /* CONFIG_DEBUG_TCG */
786 static void alloc_tcg_plugin_context(TCGContext
*s
)
789 s
->plugin_tb
= g_new0(struct qemu_plugin_tb
, 1);
790 s
->plugin_tb
->insns
=
791 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn
);
796 * All TCG threads except the parent (i.e. the one that called tcg_context_init
797 * and registered the target's TCG globals) must register with this function
798 * before initiating translation.
800 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
801 * of tcg_region_init() for the reasoning behind this.
803 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
804 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
805 * is not used anymore for translation once this function is called.
807 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
808 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
810 #ifdef CONFIG_USER_ONLY
811 void tcg_register_thread(void)
813 tcg_ctx
= &tcg_init_ctx
;
816 void tcg_register_thread(void)
818 MachineState
*ms
= MACHINE(qdev_get_machine());
819 TCGContext
*s
= g_malloc(sizeof(*s
));
825 /* Relink mem_base. */
826 for (i
= 0, n
= tcg_init_ctx
.nb_globals
; i
< n
; ++i
) {
827 if (tcg_init_ctx
.temps
[i
].mem_base
) {
828 ptrdiff_t b
= tcg_init_ctx
.temps
[i
].mem_base
- tcg_init_ctx
.temps
;
829 tcg_debug_assert(b
>= 0 && b
< n
);
830 s
->temps
[i
].mem_base
= &s
->temps
[b
];
834 /* Claim an entry in tcg_ctxs */
835 n
= qatomic_fetch_inc(&n_tcg_ctxs
);
836 g_assert(n
< ms
->smp
.max_cpus
);
837 qatomic_set(&tcg_ctxs
[n
], s
);
840 alloc_tcg_plugin_context(s
);
844 qemu_mutex_lock(®ion
.lock
);
845 err
= tcg_region_initial_alloc__locked(tcg_ctx
);
847 qemu_mutex_unlock(®ion
.lock
);
849 #endif /* !CONFIG_USER_ONLY */
852 * Returns the size (in bytes) of all translated code (i.e. from all regions)
853 * currently in the cache.
854 * See also: tcg_code_capacity()
855 * Do not confuse with tcg_current_code_size(); that one applies to a single
858 size_t tcg_code_size(void)
860 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
864 qemu_mutex_lock(®ion
.lock
);
865 total
= region
.agg_size_full
;
866 for (i
= 0; i
< n_ctxs
; i
++) {
867 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
870 size
= qatomic_read(&s
->code_gen_ptr
) - s
->code_gen_buffer
;
871 g_assert(size
<= s
->code_gen_buffer_size
);
874 qemu_mutex_unlock(®ion
.lock
);
879 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
881 * See also: tcg_code_size()
883 size_t tcg_code_capacity(void)
885 size_t guard_size
, capacity
;
887 /* no need for synchronization; these variables are set at init time */
888 guard_size
= region
.stride
- region
.size
;
889 capacity
= region
.end
+ guard_size
- region
.start
;
890 capacity
-= region
.n
* (guard_size
+ TCG_HIGHWATER
);
894 size_t tcg_tb_phys_invalidate_count(void)
896 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
900 for (i
= 0; i
< n_ctxs
; i
++) {
901 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
903 total
+= qatomic_read(&s
->tb_phys_invalidate_count
);
908 /* pool based memory allocation */
909 void *tcg_malloc_internal(TCGContext
*s
, int size
)
914 if (size
> TCG_POOL_CHUNK_SIZE
) {
915 /* big malloc: insert a new pool (XXX: could optimize) */
916 p
= g_malloc(sizeof(TCGPool
) + size
);
918 p
->next
= s
->pool_first_large
;
919 s
->pool_first_large
= p
;
930 pool_size
= TCG_POOL_CHUNK_SIZE
;
931 p
= g_malloc(sizeof(TCGPool
) + pool_size
);
935 s
->pool_current
->next
= p
;
944 s
->pool_cur
= p
->data
+ size
;
945 s
->pool_end
= p
->data
+ p
->size
;
949 void tcg_pool_reset(TCGContext
*s
)
952 for (p
= s
->pool_first_large
; p
; p
= t
) {
956 s
->pool_first_large
= NULL
;
957 s
->pool_cur
= s
->pool_end
= NULL
;
958 s
->pool_current
= NULL
;
961 typedef struct TCGHelperInfo
{
968 #include "exec/helper-proto.h"
970 static const TCGHelperInfo all_helpers
[] = {
971 #include "exec/helper-tcg.h"
973 static GHashTable
*helper_table
;
975 static int indirect_reg_alloc_order
[ARRAY_SIZE(tcg_target_reg_alloc_order
)];
976 static void process_op_defs(TCGContext
*s
);
977 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
978 TCGReg reg
, const char *name
);
980 void tcg_context_init(TCGContext
*s
)
982 int op
, total_args
, n
, i
;
984 TCGArgConstraint
*args_ct
;
987 memset(s
, 0, sizeof(*s
));
990 /* Count total number of arguments and allocate the corresponding
993 for(op
= 0; op
< NB_OPS
; op
++) {
994 def
= &tcg_op_defs
[op
];
995 n
= def
->nb_iargs
+ def
->nb_oargs
;
999 args_ct
= g_new0(TCGArgConstraint
, total_args
);
1001 for(op
= 0; op
< NB_OPS
; op
++) {
1002 def
= &tcg_op_defs
[op
];
1003 def
->args_ct
= args_ct
;
1004 n
= def
->nb_iargs
+ def
->nb_oargs
;
1008 /* Register helpers. */
1009 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1010 helper_table
= g_hash_table_new(NULL
, NULL
);
1012 for (i
= 0; i
< ARRAY_SIZE(all_helpers
); ++i
) {
1013 g_hash_table_insert(helper_table
, (gpointer
)all_helpers
[i
].func
,
1014 (gpointer
)&all_helpers
[i
]);
1020 /* Reverse the order of the saved registers, assuming they're all at
1021 the start of tcg_target_reg_alloc_order. */
1022 for (n
= 0; n
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++n
) {
1023 int r
= tcg_target_reg_alloc_order
[n
];
1024 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, r
)) {
1028 for (i
= 0; i
< n
; ++i
) {
1029 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[n
- 1 - i
];
1031 for (; i
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++i
) {
1032 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[i
];
1035 alloc_tcg_plugin_context(s
);
1039 * In user-mode we simply share the init context among threads, since we
1040 * use a single region. See the documentation tcg_region_init() for the
1041 * reasoning behind this.
1042 * In softmmu we will have at most max_cpus TCG threads.
1044 #ifdef CONFIG_USER_ONLY
1045 tcg_ctxs
= &tcg_ctx
;
1048 MachineState
*ms
= MACHINE(qdev_get_machine());
1049 unsigned int max_cpus
= ms
->smp
.max_cpus
;
1050 tcg_ctxs
= g_new(TCGContext
*, max_cpus
);
1053 tcg_debug_assert(!tcg_regset_test_reg(s
->reserved_regs
, TCG_AREG0
));
1054 ts
= tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, TCG_AREG0
, "env");
1055 cpu_env
= temp_tcgv_ptr(ts
);
1059 * Allocate TBs right before their corresponding translated code, making
1060 * sure that TBs and code are on different cache lines.
1062 TranslationBlock
*tcg_tb_alloc(TCGContext
*s
)
1064 uintptr_t align
= qemu_icache_linesize
;
1065 TranslationBlock
*tb
;
1069 tb
= (void *)ROUND_UP((uintptr_t)s
->code_gen_ptr
, align
);
1070 next
= (void *)ROUND_UP((uintptr_t)(tb
+ 1), align
);
1072 if (unlikely(next
> s
->code_gen_highwater
)) {
1073 if (tcg_region_alloc(s
)) {
1078 qatomic_set(&s
->code_gen_ptr
, next
);
1079 s
->data_gen_ptr
= NULL
;
1083 void tcg_prologue_init(TCGContext
*s
)
1085 size_t prologue_size
, total_size
;
1088 /* Put the prologue at the beginning of code_gen_buffer. */
1089 buf0
= s
->code_gen_buffer
;
1090 total_size
= s
->code_gen_buffer_size
;
1093 s
->data_gen_ptr
= NULL
;
1096 * The region trees are not yet configured, but tcg_splitwx_to_rx
1097 * needs the bounds for an assert.
1099 region
.start
= buf0
;
1100 region
.end
= buf0
+ total_size
;
1102 #ifndef CONFIG_TCG_INTERPRETER
1103 tcg_qemu_tb_exec
= (tcg_prologue_fn
*)tcg_splitwx_to_rx(buf0
);
1106 /* Compute a high-water mark, at which we voluntarily flush the buffer
1107 and start over. The size here is arbitrary, significantly larger
1108 than we expect the code generation for any one opcode to require. */
1109 s
->code_gen_highwater
= s
->code_gen_buffer
+ (total_size
- TCG_HIGHWATER
);
1111 #ifdef TCG_TARGET_NEED_POOL_LABELS
1112 s
->pool_labels
= NULL
;
1115 /* Generate the prologue. */
1116 tcg_target_qemu_prologue(s
);
1118 #ifdef TCG_TARGET_NEED_POOL_LABELS
1119 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1121 int result
= tcg_out_pool_finalize(s
);
1122 tcg_debug_assert(result
== 0);
1127 #ifndef CONFIG_TCG_INTERPRETER
1128 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0
), (uintptr_t)buf0
,
1129 tcg_ptr_byte_diff(buf1
, buf0
));
1132 /* Deduct the prologue from the buffer. */
1133 prologue_size
= tcg_current_code_size(s
);
1134 s
->code_gen_ptr
= buf1
;
1135 s
->code_gen_buffer
= buf1
;
1137 total_size
-= prologue_size
;
1138 s
->code_gen_buffer_size
= total_size
;
1140 tcg_register_jit(tcg_splitwx_to_rx(s
->code_gen_buffer
), total_size
);
1143 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM
)) {
1144 FILE *logfile
= qemu_log_lock();
1145 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size
);
1146 if (s
->data_gen_ptr
) {
1147 size_t code_size
= s
->data_gen_ptr
- buf0
;
1148 size_t data_size
= prologue_size
- code_size
;
1151 log_disas(buf0
, code_size
);
1153 for (i
= 0; i
< data_size
; i
+= sizeof(tcg_target_ulong
)) {
1154 if (sizeof(tcg_target_ulong
) == 8) {
1155 qemu_log("0x%08" PRIxPTR
": .quad 0x%016" PRIx64
"\n",
1156 (uintptr_t)s
->data_gen_ptr
+ i
,
1157 *(uint64_t *)(s
->data_gen_ptr
+ i
));
1159 qemu_log("0x%08" PRIxPTR
": .long 0x%08x\n",
1160 (uintptr_t)s
->data_gen_ptr
+ i
,
1161 *(uint32_t *)(s
->data_gen_ptr
+ i
));
1165 log_disas(buf0
, prologue_size
);
1169 qemu_log_unlock(logfile
);
1173 /* Assert that goto_ptr is implemented completely. */
1174 if (TCG_TARGET_HAS_goto_ptr
) {
1175 tcg_debug_assert(tcg_code_gen_epilogue
!= NULL
);
1179 void tcg_func_start(TCGContext
*s
)
1182 s
->nb_temps
= s
->nb_globals
;
1184 /* No temps have been previously allocated for size or locality. */
1185 memset(s
->free_temps
, 0, sizeof(s
->free_temps
));
1189 s
->current_frame_offset
= s
->frame_start
;
1191 #ifdef CONFIG_DEBUG_TCG
1192 s
->goto_tb_issue_mask
= 0;
1195 QTAILQ_INIT(&s
->ops
);
1196 QTAILQ_INIT(&s
->free_ops
);
1197 QSIMPLEQ_INIT(&s
->labels
);
1200 static inline TCGTemp
*tcg_temp_alloc(TCGContext
*s
)
1202 int n
= s
->nb_temps
++;
1203 tcg_debug_assert(n
< TCG_MAX_TEMPS
);
1204 return memset(&s
->temps
[n
], 0, sizeof(TCGTemp
));
1207 static inline TCGTemp
*tcg_global_alloc(TCGContext
*s
)
1211 tcg_debug_assert(s
->nb_globals
== s
->nb_temps
);
1213 ts
= tcg_temp_alloc(s
);
1214 ts
->temp_global
= 1;
1219 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
1220 TCGReg reg
, const char *name
)
1224 if (TCG_TARGET_REG_BITS
== 32 && type
!= TCG_TYPE_I32
) {
1228 ts
= tcg_global_alloc(s
);
1229 ts
->base_type
= type
;
1234 tcg_regset_set_reg(s
->reserved_regs
, reg
);
1239 void tcg_set_frame(TCGContext
*s
, TCGReg reg
, intptr_t start
, intptr_t size
)
1241 s
->frame_start
= start
;
1242 s
->frame_end
= start
+ size
;
1244 = tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, reg
, "_frame");
1247 TCGTemp
*tcg_global_mem_new_internal(TCGType type
, TCGv_ptr base
,
1248 intptr_t offset
, const char *name
)
1250 TCGContext
*s
= tcg_ctx
;
1251 TCGTemp
*base_ts
= tcgv_ptr_temp(base
);
1252 TCGTemp
*ts
= tcg_global_alloc(s
);
1253 int indirect_reg
= 0, bigendian
= 0;
1254 #ifdef HOST_WORDS_BIGENDIAN
1258 if (!base_ts
->fixed_reg
) {
1259 /* We do not support double-indirect registers. */
1260 tcg_debug_assert(!base_ts
->indirect_reg
);
1261 base_ts
->indirect_base
= 1;
1262 s
->nb_indirects
+= (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
1267 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1268 TCGTemp
*ts2
= tcg_global_alloc(s
);
1271 ts
->base_type
= TCG_TYPE_I64
;
1272 ts
->type
= TCG_TYPE_I32
;
1273 ts
->indirect_reg
= indirect_reg
;
1274 ts
->mem_allocated
= 1;
1275 ts
->mem_base
= base_ts
;
1276 ts
->mem_offset
= offset
+ bigendian
* 4;
1277 pstrcpy(buf
, sizeof(buf
), name
);
1278 pstrcat(buf
, sizeof(buf
), "_0");
1279 ts
->name
= strdup(buf
);
1281 tcg_debug_assert(ts2
== ts
+ 1);
1282 ts2
->base_type
= TCG_TYPE_I64
;
1283 ts2
->type
= TCG_TYPE_I32
;
1284 ts2
->indirect_reg
= indirect_reg
;
1285 ts2
->mem_allocated
= 1;
1286 ts2
->mem_base
= base_ts
;
1287 ts2
->mem_offset
= offset
+ (1 - bigendian
) * 4;
1288 pstrcpy(buf
, sizeof(buf
), name
);
1289 pstrcat(buf
, sizeof(buf
), "_1");
1290 ts2
->name
= strdup(buf
);
1292 ts
->base_type
= type
;
1294 ts
->indirect_reg
= indirect_reg
;
1295 ts
->mem_allocated
= 1;
1296 ts
->mem_base
= base_ts
;
1297 ts
->mem_offset
= offset
;
1303 TCGTemp
*tcg_temp_new_internal(TCGType type
, bool temp_local
)
1305 TCGContext
*s
= tcg_ctx
;
1309 k
= type
+ (temp_local
? TCG_TYPE_COUNT
: 0);
1310 idx
= find_first_bit(s
->free_temps
[k
].l
, TCG_MAX_TEMPS
);
1311 if (idx
< TCG_MAX_TEMPS
) {
1312 /* There is already an available temp with the right type. */
1313 clear_bit(idx
, s
->free_temps
[k
].l
);
1315 ts
= &s
->temps
[idx
];
1316 ts
->temp_allocated
= 1;
1317 tcg_debug_assert(ts
->base_type
== type
);
1318 tcg_debug_assert(ts
->temp_local
== temp_local
);
1320 ts
= tcg_temp_alloc(s
);
1321 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1322 TCGTemp
*ts2
= tcg_temp_alloc(s
);
1324 ts
->base_type
= type
;
1325 ts
->type
= TCG_TYPE_I32
;
1326 ts
->temp_allocated
= 1;
1327 ts
->temp_local
= temp_local
;
1329 tcg_debug_assert(ts2
== ts
+ 1);
1330 ts2
->base_type
= TCG_TYPE_I64
;
1331 ts2
->type
= TCG_TYPE_I32
;
1332 ts2
->temp_allocated
= 1;
1333 ts2
->temp_local
= temp_local
;
1335 ts
->base_type
= type
;
1337 ts
->temp_allocated
= 1;
1338 ts
->temp_local
= temp_local
;
1342 #if defined(CONFIG_DEBUG_TCG)
1348 TCGv_vec
tcg_temp_new_vec(TCGType type
)
1352 #ifdef CONFIG_DEBUG_TCG
1355 assert(TCG_TARGET_HAS_v64
);
1358 assert(TCG_TARGET_HAS_v128
);
1361 assert(TCG_TARGET_HAS_v256
);
1364 g_assert_not_reached();
1368 t
= tcg_temp_new_internal(type
, 0);
1369 return temp_tcgv_vec(t
);
1372 /* Create a new temp of the same type as an existing temp. */
1373 TCGv_vec
tcg_temp_new_vec_matching(TCGv_vec match
)
1375 TCGTemp
*t
= tcgv_vec_temp(match
);
1377 tcg_debug_assert(t
->temp_allocated
!= 0);
1379 t
= tcg_temp_new_internal(t
->base_type
, 0);
1380 return temp_tcgv_vec(t
);
1383 void tcg_temp_free_internal(TCGTemp
*ts
)
1385 TCGContext
*s
= tcg_ctx
;
1388 #if defined(CONFIG_DEBUG_TCG)
1390 if (s
->temps_in_use
< 0) {
1391 fprintf(stderr
, "More temporaries freed than allocated!\n");
1395 tcg_debug_assert(ts
->temp_global
== 0);
1396 tcg_debug_assert(ts
->temp_allocated
!= 0);
1397 ts
->temp_allocated
= 0;
1400 k
= ts
->base_type
+ (ts
->temp_local
? TCG_TYPE_COUNT
: 0);
1401 set_bit(idx
, s
->free_temps
[k
].l
);
1404 TCGv_i32
tcg_const_i32(int32_t val
)
1407 t0
= tcg_temp_new_i32();
1408 tcg_gen_movi_i32(t0
, val
);
1412 TCGv_i64
tcg_const_i64(int64_t val
)
1415 t0
= tcg_temp_new_i64();
1416 tcg_gen_movi_i64(t0
, val
);
1420 TCGv_i32
tcg_const_local_i32(int32_t val
)
1423 t0
= tcg_temp_local_new_i32();
1424 tcg_gen_movi_i32(t0
, val
);
1428 TCGv_i64
tcg_const_local_i64(int64_t val
)
1431 t0
= tcg_temp_local_new_i64();
1432 tcg_gen_movi_i64(t0
, val
);
1436 #if defined(CONFIG_DEBUG_TCG)
1437 void tcg_clear_temp_count(void)
1439 TCGContext
*s
= tcg_ctx
;
1440 s
->temps_in_use
= 0;
1443 int tcg_check_temp_count(void)
1445 TCGContext
*s
= tcg_ctx
;
1446 if (s
->temps_in_use
) {
1447 /* Clear the count so that we don't give another
1448 * warning immediately next time around.
1450 s
->temps_in_use
= 0;
1457 /* Return true if OP may appear in the opcode stream.
1458 Test the runtime variable that controls each opcode. */
1459 bool tcg_op_supported(TCGOpcode op
)
1462 = TCG_TARGET_HAS_v64
| TCG_TARGET_HAS_v128
| TCG_TARGET_HAS_v256
;
1465 case INDEX_op_discard
:
1466 case INDEX_op_set_label
:
1470 case INDEX_op_insn_start
:
1471 case INDEX_op_exit_tb
:
1472 case INDEX_op_goto_tb
:
1473 case INDEX_op_qemu_ld_i32
:
1474 case INDEX_op_qemu_st_i32
:
1475 case INDEX_op_qemu_ld_i64
:
1476 case INDEX_op_qemu_st_i64
:
1479 case INDEX_op_qemu_st8_i32
:
1480 return TCG_TARGET_HAS_qemu_st8_i32
;
1482 case INDEX_op_goto_ptr
:
1483 return TCG_TARGET_HAS_goto_ptr
;
1485 case INDEX_op_mov_i32
:
1486 case INDEX_op_movi_i32
:
1487 case INDEX_op_setcond_i32
:
1488 case INDEX_op_brcond_i32
:
1489 case INDEX_op_ld8u_i32
:
1490 case INDEX_op_ld8s_i32
:
1491 case INDEX_op_ld16u_i32
:
1492 case INDEX_op_ld16s_i32
:
1493 case INDEX_op_ld_i32
:
1494 case INDEX_op_st8_i32
:
1495 case INDEX_op_st16_i32
:
1496 case INDEX_op_st_i32
:
1497 case INDEX_op_add_i32
:
1498 case INDEX_op_sub_i32
:
1499 case INDEX_op_mul_i32
:
1500 case INDEX_op_and_i32
:
1501 case INDEX_op_or_i32
:
1502 case INDEX_op_xor_i32
:
1503 case INDEX_op_shl_i32
:
1504 case INDEX_op_shr_i32
:
1505 case INDEX_op_sar_i32
:
1508 case INDEX_op_movcond_i32
:
1509 return TCG_TARGET_HAS_movcond_i32
;
1510 case INDEX_op_div_i32
:
1511 case INDEX_op_divu_i32
:
1512 return TCG_TARGET_HAS_div_i32
;
1513 case INDEX_op_rem_i32
:
1514 case INDEX_op_remu_i32
:
1515 return TCG_TARGET_HAS_rem_i32
;
1516 case INDEX_op_div2_i32
:
1517 case INDEX_op_divu2_i32
:
1518 return TCG_TARGET_HAS_div2_i32
;
1519 case INDEX_op_rotl_i32
:
1520 case INDEX_op_rotr_i32
:
1521 return TCG_TARGET_HAS_rot_i32
;
1522 case INDEX_op_deposit_i32
:
1523 return TCG_TARGET_HAS_deposit_i32
;
1524 case INDEX_op_extract_i32
:
1525 return TCG_TARGET_HAS_extract_i32
;
1526 case INDEX_op_sextract_i32
:
1527 return TCG_TARGET_HAS_sextract_i32
;
1528 case INDEX_op_extract2_i32
:
1529 return TCG_TARGET_HAS_extract2_i32
;
1530 case INDEX_op_add2_i32
:
1531 return TCG_TARGET_HAS_add2_i32
;
1532 case INDEX_op_sub2_i32
:
1533 return TCG_TARGET_HAS_sub2_i32
;
1534 case INDEX_op_mulu2_i32
:
1535 return TCG_TARGET_HAS_mulu2_i32
;
1536 case INDEX_op_muls2_i32
:
1537 return TCG_TARGET_HAS_muls2_i32
;
1538 case INDEX_op_muluh_i32
:
1539 return TCG_TARGET_HAS_muluh_i32
;
1540 case INDEX_op_mulsh_i32
:
1541 return TCG_TARGET_HAS_mulsh_i32
;
1542 case INDEX_op_ext8s_i32
:
1543 return TCG_TARGET_HAS_ext8s_i32
;
1544 case INDEX_op_ext16s_i32
:
1545 return TCG_TARGET_HAS_ext16s_i32
;
1546 case INDEX_op_ext8u_i32
:
1547 return TCG_TARGET_HAS_ext8u_i32
;
1548 case INDEX_op_ext16u_i32
:
1549 return TCG_TARGET_HAS_ext16u_i32
;
1550 case INDEX_op_bswap16_i32
:
1551 return TCG_TARGET_HAS_bswap16_i32
;
1552 case INDEX_op_bswap32_i32
:
1553 return TCG_TARGET_HAS_bswap32_i32
;
1554 case INDEX_op_not_i32
:
1555 return TCG_TARGET_HAS_not_i32
;
1556 case INDEX_op_neg_i32
:
1557 return TCG_TARGET_HAS_neg_i32
;
1558 case INDEX_op_andc_i32
:
1559 return TCG_TARGET_HAS_andc_i32
;
1560 case INDEX_op_orc_i32
:
1561 return TCG_TARGET_HAS_orc_i32
;
1562 case INDEX_op_eqv_i32
:
1563 return TCG_TARGET_HAS_eqv_i32
;
1564 case INDEX_op_nand_i32
:
1565 return TCG_TARGET_HAS_nand_i32
;
1566 case INDEX_op_nor_i32
:
1567 return TCG_TARGET_HAS_nor_i32
;
1568 case INDEX_op_clz_i32
:
1569 return TCG_TARGET_HAS_clz_i32
;
1570 case INDEX_op_ctz_i32
:
1571 return TCG_TARGET_HAS_ctz_i32
;
1572 case INDEX_op_ctpop_i32
:
1573 return TCG_TARGET_HAS_ctpop_i32
;
1575 case INDEX_op_brcond2_i32
:
1576 case INDEX_op_setcond2_i32
:
1577 return TCG_TARGET_REG_BITS
== 32;
1579 case INDEX_op_mov_i64
:
1580 case INDEX_op_movi_i64
:
1581 case INDEX_op_setcond_i64
:
1582 case INDEX_op_brcond_i64
:
1583 case INDEX_op_ld8u_i64
:
1584 case INDEX_op_ld8s_i64
:
1585 case INDEX_op_ld16u_i64
:
1586 case INDEX_op_ld16s_i64
:
1587 case INDEX_op_ld32u_i64
:
1588 case INDEX_op_ld32s_i64
:
1589 case INDEX_op_ld_i64
:
1590 case INDEX_op_st8_i64
:
1591 case INDEX_op_st16_i64
:
1592 case INDEX_op_st32_i64
:
1593 case INDEX_op_st_i64
:
1594 case INDEX_op_add_i64
:
1595 case INDEX_op_sub_i64
:
1596 case INDEX_op_mul_i64
:
1597 case INDEX_op_and_i64
:
1598 case INDEX_op_or_i64
:
1599 case INDEX_op_xor_i64
:
1600 case INDEX_op_shl_i64
:
1601 case INDEX_op_shr_i64
:
1602 case INDEX_op_sar_i64
:
1603 case INDEX_op_ext_i32_i64
:
1604 case INDEX_op_extu_i32_i64
:
1605 return TCG_TARGET_REG_BITS
== 64;
1607 case INDEX_op_movcond_i64
:
1608 return TCG_TARGET_HAS_movcond_i64
;
1609 case INDEX_op_div_i64
:
1610 case INDEX_op_divu_i64
:
1611 return TCG_TARGET_HAS_div_i64
;
1612 case INDEX_op_rem_i64
:
1613 case INDEX_op_remu_i64
:
1614 return TCG_TARGET_HAS_rem_i64
;
1615 case INDEX_op_div2_i64
:
1616 case INDEX_op_divu2_i64
:
1617 return TCG_TARGET_HAS_div2_i64
;
1618 case INDEX_op_rotl_i64
:
1619 case INDEX_op_rotr_i64
:
1620 return TCG_TARGET_HAS_rot_i64
;
1621 case INDEX_op_deposit_i64
:
1622 return TCG_TARGET_HAS_deposit_i64
;
1623 case INDEX_op_extract_i64
:
1624 return TCG_TARGET_HAS_extract_i64
;
1625 case INDEX_op_sextract_i64
:
1626 return TCG_TARGET_HAS_sextract_i64
;
1627 case INDEX_op_extract2_i64
:
1628 return TCG_TARGET_HAS_extract2_i64
;
1629 case INDEX_op_extrl_i64_i32
:
1630 return TCG_TARGET_HAS_extrl_i64_i32
;
1631 case INDEX_op_extrh_i64_i32
:
1632 return TCG_TARGET_HAS_extrh_i64_i32
;
1633 case INDEX_op_ext8s_i64
:
1634 return TCG_TARGET_HAS_ext8s_i64
;
1635 case INDEX_op_ext16s_i64
:
1636 return TCG_TARGET_HAS_ext16s_i64
;
1637 case INDEX_op_ext32s_i64
:
1638 return TCG_TARGET_HAS_ext32s_i64
;
1639 case INDEX_op_ext8u_i64
:
1640 return TCG_TARGET_HAS_ext8u_i64
;
1641 case INDEX_op_ext16u_i64
:
1642 return TCG_TARGET_HAS_ext16u_i64
;
1643 case INDEX_op_ext32u_i64
:
1644 return TCG_TARGET_HAS_ext32u_i64
;
1645 case INDEX_op_bswap16_i64
:
1646 return TCG_TARGET_HAS_bswap16_i64
;
1647 case INDEX_op_bswap32_i64
:
1648 return TCG_TARGET_HAS_bswap32_i64
;
1649 case INDEX_op_bswap64_i64
:
1650 return TCG_TARGET_HAS_bswap64_i64
;
1651 case INDEX_op_not_i64
:
1652 return TCG_TARGET_HAS_not_i64
;
1653 case INDEX_op_neg_i64
:
1654 return TCG_TARGET_HAS_neg_i64
;
1655 case INDEX_op_andc_i64
:
1656 return TCG_TARGET_HAS_andc_i64
;
1657 case INDEX_op_orc_i64
:
1658 return TCG_TARGET_HAS_orc_i64
;
1659 case INDEX_op_eqv_i64
:
1660 return TCG_TARGET_HAS_eqv_i64
;
1661 case INDEX_op_nand_i64
:
1662 return TCG_TARGET_HAS_nand_i64
;
1663 case INDEX_op_nor_i64
:
1664 return TCG_TARGET_HAS_nor_i64
;
1665 case INDEX_op_clz_i64
:
1666 return TCG_TARGET_HAS_clz_i64
;
1667 case INDEX_op_ctz_i64
:
1668 return TCG_TARGET_HAS_ctz_i64
;
1669 case INDEX_op_ctpop_i64
:
1670 return TCG_TARGET_HAS_ctpop_i64
;
1671 case INDEX_op_add2_i64
:
1672 return TCG_TARGET_HAS_add2_i64
;
1673 case INDEX_op_sub2_i64
:
1674 return TCG_TARGET_HAS_sub2_i64
;
1675 case INDEX_op_mulu2_i64
:
1676 return TCG_TARGET_HAS_mulu2_i64
;
1677 case INDEX_op_muls2_i64
:
1678 return TCG_TARGET_HAS_muls2_i64
;
1679 case INDEX_op_muluh_i64
:
1680 return TCG_TARGET_HAS_muluh_i64
;
1681 case INDEX_op_mulsh_i64
:
1682 return TCG_TARGET_HAS_mulsh_i64
;
1684 case INDEX_op_mov_vec
:
1685 case INDEX_op_dup_vec
:
1686 case INDEX_op_dupi_vec
:
1687 case INDEX_op_dupm_vec
:
1688 case INDEX_op_ld_vec
:
1689 case INDEX_op_st_vec
:
1690 case INDEX_op_add_vec
:
1691 case INDEX_op_sub_vec
:
1692 case INDEX_op_and_vec
:
1693 case INDEX_op_or_vec
:
1694 case INDEX_op_xor_vec
:
1695 case INDEX_op_cmp_vec
:
1697 case INDEX_op_dup2_vec
:
1698 return have_vec
&& TCG_TARGET_REG_BITS
== 32;
1699 case INDEX_op_not_vec
:
1700 return have_vec
&& TCG_TARGET_HAS_not_vec
;
1701 case INDEX_op_neg_vec
:
1702 return have_vec
&& TCG_TARGET_HAS_neg_vec
;
1703 case INDEX_op_abs_vec
:
1704 return have_vec
&& TCG_TARGET_HAS_abs_vec
;
1705 case INDEX_op_andc_vec
:
1706 return have_vec
&& TCG_TARGET_HAS_andc_vec
;
1707 case INDEX_op_orc_vec
:
1708 return have_vec
&& TCG_TARGET_HAS_orc_vec
;
1709 case INDEX_op_mul_vec
:
1710 return have_vec
&& TCG_TARGET_HAS_mul_vec
;
1711 case INDEX_op_shli_vec
:
1712 case INDEX_op_shri_vec
:
1713 case INDEX_op_sari_vec
:
1714 return have_vec
&& TCG_TARGET_HAS_shi_vec
;
1715 case INDEX_op_shls_vec
:
1716 case INDEX_op_shrs_vec
:
1717 case INDEX_op_sars_vec
:
1718 return have_vec
&& TCG_TARGET_HAS_shs_vec
;
1719 case INDEX_op_shlv_vec
:
1720 case INDEX_op_shrv_vec
:
1721 case INDEX_op_sarv_vec
:
1722 return have_vec
&& TCG_TARGET_HAS_shv_vec
;
1723 case INDEX_op_rotli_vec
:
1724 return have_vec
&& TCG_TARGET_HAS_roti_vec
;
1725 case INDEX_op_rotls_vec
:
1726 return have_vec
&& TCG_TARGET_HAS_rots_vec
;
1727 case INDEX_op_rotlv_vec
:
1728 case INDEX_op_rotrv_vec
:
1729 return have_vec
&& TCG_TARGET_HAS_rotv_vec
;
1730 case INDEX_op_ssadd_vec
:
1731 case INDEX_op_usadd_vec
:
1732 case INDEX_op_sssub_vec
:
1733 case INDEX_op_ussub_vec
:
1734 return have_vec
&& TCG_TARGET_HAS_sat_vec
;
1735 case INDEX_op_smin_vec
:
1736 case INDEX_op_umin_vec
:
1737 case INDEX_op_smax_vec
:
1738 case INDEX_op_umax_vec
:
1739 return have_vec
&& TCG_TARGET_HAS_minmax_vec
;
1740 case INDEX_op_bitsel_vec
:
1741 return have_vec
&& TCG_TARGET_HAS_bitsel_vec
;
1742 case INDEX_op_cmpsel_vec
:
1743 return have_vec
&& TCG_TARGET_HAS_cmpsel_vec
;
1746 tcg_debug_assert(op
> INDEX_op_last_generic
&& op
< NB_OPS
);
1751 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1752 and endian swap. Maybe it would be better to do the alignment
1753 and endian swap in tcg_reg_alloc_call(). */
1754 void tcg_gen_callN(void *func
, TCGTemp
*ret
, int nargs
, TCGTemp
**args
)
1756 int i
, real_args
, nb_rets
, pi
;
1757 unsigned sizemask
, flags
;
1758 TCGHelperInfo
*info
;
1761 info
= g_hash_table_lookup(helper_table
, (gpointer
)func
);
1762 flags
= info
->flags
;
1763 sizemask
= info
->sizemask
;
1765 #ifdef CONFIG_PLUGIN
1766 /* detect non-plugin helpers */
1767 if (tcg_ctx
->plugin_insn
&& unlikely(strncmp(info
->name
, "plugin_", 7))) {
1768 tcg_ctx
->plugin_insn
->calls_helpers
= true;
1772 #if defined(__sparc__) && !defined(__arch64__) \
1773 && !defined(CONFIG_TCG_INTERPRETER)
1774 /* We have 64-bit values in one register, but need to pass as two
1775 separate parameters. Split them. */
1776 int orig_sizemask
= sizemask
;
1777 int orig_nargs
= nargs
;
1778 TCGv_i64 retl
, reth
;
1779 TCGTemp
*split_args
[MAX_OPC_PARAM
];
1783 if (sizemask
!= 0) {
1784 for (i
= real_args
= 0; i
< nargs
; ++i
) {
1785 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1787 TCGv_i64 orig
= temp_tcgv_i64(args
[i
]);
1788 TCGv_i32 h
= tcg_temp_new_i32();
1789 TCGv_i32 l
= tcg_temp_new_i32();
1790 tcg_gen_extr_i64_i32(l
, h
, orig
);
1791 split_args
[real_args
++] = tcgv_i32_temp(h
);
1792 split_args
[real_args
++] = tcgv_i32_temp(l
);
1794 split_args
[real_args
++] = args
[i
];
1801 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1802 for (i
= 0; i
< nargs
; ++i
) {
1803 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1804 int is_signed
= sizemask
& (2 << (i
+1)*2);
1806 TCGv_i64 temp
= tcg_temp_new_i64();
1807 TCGv_i64 orig
= temp_tcgv_i64(args
[i
]);
1809 tcg_gen_ext32s_i64(temp
, orig
);
1811 tcg_gen_ext32u_i64(temp
, orig
);
1813 args
[i
] = tcgv_i64_temp(temp
);
1816 #endif /* TCG_TARGET_EXTEND_ARGS */
1818 op
= tcg_emit_op(INDEX_op_call
);
1822 #if defined(__sparc__) && !defined(__arch64__) \
1823 && !defined(CONFIG_TCG_INTERPRETER)
1824 if (orig_sizemask
& 1) {
1825 /* The 32-bit ABI is going to return the 64-bit value in
1826 the %o0/%o1 register pair. Prepare for this by using
1827 two return temporaries, and reassemble below. */
1828 retl
= tcg_temp_new_i64();
1829 reth
= tcg_temp_new_i64();
1830 op
->args
[pi
++] = tcgv_i64_arg(reth
);
1831 op
->args
[pi
++] = tcgv_i64_arg(retl
);
1834 op
->args
[pi
++] = temp_arg(ret
);
1838 if (TCG_TARGET_REG_BITS
< 64 && (sizemask
& 1)) {
1839 #ifdef HOST_WORDS_BIGENDIAN
1840 op
->args
[pi
++] = temp_arg(ret
+ 1);
1841 op
->args
[pi
++] = temp_arg(ret
);
1843 op
->args
[pi
++] = temp_arg(ret
);
1844 op
->args
[pi
++] = temp_arg(ret
+ 1);
1848 op
->args
[pi
++] = temp_arg(ret
);
1855 TCGOP_CALLO(op
) = nb_rets
;
1858 for (i
= 0; i
< nargs
; i
++) {
1859 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1860 if (TCG_TARGET_REG_BITS
< 64 && is_64bit
) {
1861 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1862 /* some targets want aligned 64 bit args */
1863 if (real_args
& 1) {
1864 op
->args
[pi
++] = TCG_CALL_DUMMY_ARG
;
1868 /* If stack grows up, then we will be placing successive
1869 arguments at lower addresses, which means we need to
1870 reverse the order compared to how we would normally
1871 treat either big or little-endian. For those arguments
1872 that will wind up in registers, this still works for
1873 HPPA (the only current STACK_GROWSUP target) since the
1874 argument registers are *also* allocated in decreasing
1875 order. If another such target is added, this logic may
1876 have to get more complicated to differentiate between
1877 stack arguments and register arguments. */
1878 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1879 op
->args
[pi
++] = temp_arg(args
[i
] + 1);
1880 op
->args
[pi
++] = temp_arg(args
[i
]);
1882 op
->args
[pi
++] = temp_arg(args
[i
]);
1883 op
->args
[pi
++] = temp_arg(args
[i
] + 1);
1889 op
->args
[pi
++] = temp_arg(args
[i
]);
1892 op
->args
[pi
++] = (uintptr_t)func
;
1893 op
->args
[pi
++] = flags
;
1894 TCGOP_CALLI(op
) = real_args
;
1896 /* Make sure the fields didn't overflow. */
1897 tcg_debug_assert(TCGOP_CALLI(op
) == real_args
);
1898 tcg_debug_assert(pi
<= ARRAY_SIZE(op
->args
));
1900 #if defined(__sparc__) && !defined(__arch64__) \
1901 && !defined(CONFIG_TCG_INTERPRETER)
1902 /* Free all of the parts we allocated above. */
1903 for (i
= real_args
= 0; i
< orig_nargs
; ++i
) {
1904 int is_64bit
= orig_sizemask
& (1 << (i
+1)*2);
1906 tcg_temp_free_internal(args
[real_args
++]);
1907 tcg_temp_free_internal(args
[real_args
++]);
1912 if (orig_sizemask
& 1) {
1913 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1914 Note that describing these as TCGv_i64 eliminates an unnecessary
1915 zero-extension that tcg_gen_concat_i32_i64 would create. */
1916 tcg_gen_concat32_i64(temp_tcgv_i64(ret
), retl
, reth
);
1917 tcg_temp_free_i64(retl
);
1918 tcg_temp_free_i64(reth
);
1920 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1921 for (i
= 0; i
< nargs
; ++i
) {
1922 int is_64bit
= sizemask
& (1 << (i
+1)*2);
1924 tcg_temp_free_internal(args
[i
]);
1927 #endif /* TCG_TARGET_EXTEND_ARGS */
1930 static void tcg_reg_alloc_start(TCGContext
*s
)
1935 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
1937 ts
->val_type
= (ts
->fixed_reg
? TEMP_VAL_REG
: TEMP_VAL_MEM
);
1939 for (n
= s
->nb_temps
; i
< n
; i
++) {
1941 ts
->val_type
= (ts
->temp_local
? TEMP_VAL_MEM
: TEMP_VAL_DEAD
);
1942 ts
->mem_allocated
= 0;
1946 memset(s
->reg_to_temp
, 0, sizeof(s
->reg_to_temp
));
1949 static char *tcg_get_arg_str_ptr(TCGContext
*s
, char *buf
, int buf_size
,
1952 int idx
= temp_idx(ts
);
1954 if (ts
->temp_global
) {
1955 pstrcpy(buf
, buf_size
, ts
->name
);
1956 } else if (ts
->temp_local
) {
1957 snprintf(buf
, buf_size
, "loc%d", idx
- s
->nb_globals
);
1959 snprintf(buf
, buf_size
, "tmp%d", idx
- s
->nb_globals
);
1964 static char *tcg_get_arg_str(TCGContext
*s
, char *buf
,
1965 int buf_size
, TCGArg arg
)
1967 return tcg_get_arg_str_ptr(s
, buf
, buf_size
, arg_temp(arg
));
1970 /* Find helper name. */
1971 static inline const char *tcg_find_helper(TCGContext
*s
, uintptr_t val
)
1973 const char *ret
= NULL
;
1975 TCGHelperInfo
*info
= g_hash_table_lookup(helper_table
, (gpointer
)val
);
1983 static const char * const cond_name
[] =
1985 [TCG_COND_NEVER
] = "never",
1986 [TCG_COND_ALWAYS
] = "always",
1987 [TCG_COND_EQ
] = "eq",
1988 [TCG_COND_NE
] = "ne",
1989 [TCG_COND_LT
] = "lt",
1990 [TCG_COND_GE
] = "ge",
1991 [TCG_COND_LE
] = "le",
1992 [TCG_COND_GT
] = "gt",
1993 [TCG_COND_LTU
] = "ltu",
1994 [TCG_COND_GEU
] = "geu",
1995 [TCG_COND_LEU
] = "leu",
1996 [TCG_COND_GTU
] = "gtu"
1999 static const char * const ldst_name
[] =
2015 static const char * const alignment_name
[(MO_AMASK
>> MO_ASHIFT
) + 1] = {
2016 #ifdef TARGET_ALIGNED_ONLY
2017 [MO_UNALN
>> MO_ASHIFT
] = "un+",
2018 [MO_ALIGN
>> MO_ASHIFT
] = "",
2020 [MO_UNALN
>> MO_ASHIFT
] = "",
2021 [MO_ALIGN
>> MO_ASHIFT
] = "al+",
2023 [MO_ALIGN_2
>> MO_ASHIFT
] = "al2+",
2024 [MO_ALIGN_4
>> MO_ASHIFT
] = "al4+",
2025 [MO_ALIGN_8
>> MO_ASHIFT
] = "al8+",
2026 [MO_ALIGN_16
>> MO_ASHIFT
] = "al16+",
2027 [MO_ALIGN_32
>> MO_ASHIFT
] = "al32+",
2028 [MO_ALIGN_64
>> MO_ASHIFT
] = "al64+",
2031 static inline bool tcg_regset_single(TCGRegSet d
)
2033 return (d
& (d
- 1)) == 0;
2036 static inline TCGReg
tcg_regset_first(TCGRegSet d
)
2038 if (TCG_TARGET_NB_REGS
<= 32) {
2045 static void tcg_dump_ops(TCGContext
*s
, bool have_prefs
)
2050 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
2051 int i
, k
, nb_oargs
, nb_iargs
, nb_cargs
;
2052 const TCGOpDef
*def
;
2057 def
= &tcg_op_defs
[c
];
2059 if (c
== INDEX_op_insn_start
) {
2061 col
+= qemu_log("\n ----");
2063 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
2065 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2066 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
2070 col
+= qemu_log(" " TARGET_FMT_lx
, a
);
2072 } else if (c
== INDEX_op_call
) {
2073 /* variable number of arguments */
2074 nb_oargs
= TCGOP_CALLO(op
);
2075 nb_iargs
= TCGOP_CALLI(op
);
2076 nb_cargs
= def
->nb_cargs
;
2078 /* function name, flags, out args */
2079 col
+= qemu_log(" %s %s,$0x%" TCG_PRIlx
",$%d", def
->name
,
2080 tcg_find_helper(s
, op
->args
[nb_oargs
+ nb_iargs
]),
2081 op
->args
[nb_oargs
+ nb_iargs
+ 1], nb_oargs
);
2082 for (i
= 0; i
< nb_oargs
; i
++) {
2083 col
+= qemu_log(",%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2086 for (i
= 0; i
< nb_iargs
; i
++) {
2087 TCGArg arg
= op
->args
[nb_oargs
+ i
];
2088 const char *t
= "<dummy>";
2089 if (arg
!= TCG_CALL_DUMMY_ARG
) {
2090 t
= tcg_get_arg_str(s
, buf
, sizeof(buf
), arg
);
2092 col
+= qemu_log(",%s", t
);
2095 col
+= qemu_log(" %s ", def
->name
);
2097 nb_oargs
= def
->nb_oargs
;
2098 nb_iargs
= def
->nb_iargs
;
2099 nb_cargs
= def
->nb_cargs
;
2101 if (def
->flags
& TCG_OPF_VECTOR
) {
2102 col
+= qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op
),
2103 8 << TCGOP_VECE(op
));
2107 for (i
= 0; i
< nb_oargs
; i
++) {
2109 col
+= qemu_log(",");
2111 col
+= qemu_log("%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2114 for (i
= 0; i
< nb_iargs
; i
++) {
2116 col
+= qemu_log(",");
2118 col
+= qemu_log("%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2122 case INDEX_op_brcond_i32
:
2123 case INDEX_op_setcond_i32
:
2124 case INDEX_op_movcond_i32
:
2125 case INDEX_op_brcond2_i32
:
2126 case INDEX_op_setcond2_i32
:
2127 case INDEX_op_brcond_i64
:
2128 case INDEX_op_setcond_i64
:
2129 case INDEX_op_movcond_i64
:
2130 case INDEX_op_cmp_vec
:
2131 case INDEX_op_cmpsel_vec
:
2132 if (op
->args
[k
] < ARRAY_SIZE(cond_name
)
2133 && cond_name
[op
->args
[k
]]) {
2134 col
+= qemu_log(",%s", cond_name
[op
->args
[k
++]]);
2136 col
+= qemu_log(",$0x%" TCG_PRIlx
, op
->args
[k
++]);
2140 case INDEX_op_qemu_ld_i32
:
2141 case INDEX_op_qemu_st_i32
:
2142 case INDEX_op_qemu_st8_i32
:
2143 case INDEX_op_qemu_ld_i64
:
2144 case INDEX_op_qemu_st_i64
:
2146 TCGMemOpIdx oi
= op
->args
[k
++];
2147 MemOp op
= get_memop(oi
);
2148 unsigned ix
= get_mmuidx(oi
);
2150 if (op
& ~(MO_AMASK
| MO_BSWAP
| MO_SSIZE
)) {
2151 col
+= qemu_log(",$0x%x,%u", op
, ix
);
2153 const char *s_al
, *s_op
;
2154 s_al
= alignment_name
[(op
& MO_AMASK
) >> MO_ASHIFT
];
2155 s_op
= ldst_name
[op
& (MO_BSWAP
| MO_SSIZE
)];
2156 col
+= qemu_log(",%s%s,%u", s_al
, s_op
, ix
);
2166 case INDEX_op_set_label
:
2168 case INDEX_op_brcond_i32
:
2169 case INDEX_op_brcond_i64
:
2170 case INDEX_op_brcond2_i32
:
2171 col
+= qemu_log("%s$L%d", k
? "," : "",
2172 arg_label(op
->args
[k
])->id
);
2178 for (; i
< nb_cargs
; i
++, k
++) {
2179 col
+= qemu_log("%s$0x%" TCG_PRIlx
, k
? "," : "", op
->args
[k
]);
2183 if (have_prefs
|| op
->life
) {
2185 QemuLogFile
*logfile
;
2188 logfile
= qatomic_rcu_read(&qemu_logfile
);
2190 for (; col
< 40; ++col
) {
2191 putc(' ', logfile
->fd
);
2198 unsigned life
= op
->life
;
2200 if (life
& (SYNC_ARG
* 3)) {
2202 for (i
= 0; i
< 2; ++i
) {
2203 if (life
& (SYNC_ARG
<< i
)) {
2211 for (i
= 0; life
; ++i
, life
>>= 1) {
2220 for (i
= 0; i
< nb_oargs
; ++i
) {
2221 TCGRegSet set
= op
->output_pref
[i
];
2230 } else if (set
== MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS
)) {
2232 #ifdef CONFIG_DEBUG_TCG
2233 } else if (tcg_regset_single(set
)) {
2234 TCGReg reg
= tcg_regset_first(set
);
2235 qemu_log("%s", tcg_target_reg_names
[reg
]);
2237 } else if (TCG_TARGET_NB_REGS
<= 32) {
2238 qemu_log("%#x", (uint32_t)set
);
2240 qemu_log("%#" PRIx64
, (uint64_t)set
);
2249 /* we give more priority to constraints with less registers */
2250 static int get_constraint_priority(const TCGOpDef
*def
, int k
)
2252 const TCGArgConstraint
*arg_ct
= &def
->args_ct
[k
];
2255 if (arg_ct
->oalias
) {
2256 /* an alias is equivalent to a single register */
2259 n
= ctpop64(arg_ct
->regs
);
2261 return TCG_TARGET_NB_REGS
- n
+ 1;
2264 /* sort from highest priority to lowest */
2265 static void sort_constraints(TCGOpDef
*def
, int start
, int n
)
2268 TCGArgConstraint
*a
= def
->args_ct
;
2270 for (i
= 0; i
< n
; i
++) {
2271 a
[start
+ i
].sort_index
= start
+ i
;
2276 for (i
= 0; i
< n
- 1; i
++) {
2277 for (j
= i
+ 1; j
< n
; j
++) {
2278 int p1
= get_constraint_priority(def
, a
[start
+ i
].sort_index
);
2279 int p2
= get_constraint_priority(def
, a
[start
+ j
].sort_index
);
2281 int tmp
= a
[start
+ i
].sort_index
;
2282 a
[start
+ i
].sort_index
= a
[start
+ j
].sort_index
;
2283 a
[start
+ j
].sort_index
= tmp
;
2289 static void process_op_defs(TCGContext
*s
)
2293 for (op
= 0; op
< NB_OPS
; op
++) {
2294 TCGOpDef
*def
= &tcg_op_defs
[op
];
2295 const TCGTargetOpDef
*tdefs
;
2299 if (def
->flags
& TCG_OPF_NOT_PRESENT
) {
2303 nb_args
= def
->nb_iargs
+ def
->nb_oargs
;
2308 tdefs
= tcg_target_op_def(op
);
2309 /* Missing TCGTargetOpDef entry. */
2310 tcg_debug_assert(tdefs
!= NULL
);
2312 type
= (def
->flags
& TCG_OPF_64BIT
? TCG_TYPE_I64
: TCG_TYPE_I32
);
2313 for (i
= 0; i
< nb_args
; i
++) {
2314 const char *ct_str
= tdefs
->args_ct_str
[i
];
2315 /* Incomplete TCGTargetOpDef entry. */
2316 tcg_debug_assert(ct_str
!= NULL
);
2318 while (*ct_str
!= '\0') {
2322 int oarg
= *ct_str
- '0';
2323 tcg_debug_assert(ct_str
== tdefs
->args_ct_str
[i
]);
2324 tcg_debug_assert(oarg
< def
->nb_oargs
);
2325 tcg_debug_assert(def
->args_ct
[oarg
].regs
!= 0);
2326 def
->args_ct
[i
] = def
->args_ct
[oarg
];
2327 /* The output sets oalias. */
2328 def
->args_ct
[oarg
].oalias
= true;
2329 def
->args_ct
[oarg
].alias_index
= i
;
2330 /* The input sets ialias. */
2331 def
->args_ct
[i
].ialias
= true;
2332 def
->args_ct
[i
].alias_index
= oarg
;
2337 def
->args_ct
[i
].newreg
= true;
2341 def
->args_ct
[i
].ct
|= TCG_CT_CONST
;
2345 ct_str
= target_parse_constraint(&def
->args_ct
[i
],
2347 /* Typo in TCGTargetOpDef constraint. */
2348 tcg_debug_assert(ct_str
!= NULL
);
2353 /* TCGTargetOpDef entry with too much information? */
2354 tcg_debug_assert(i
== TCG_MAX_OP_ARGS
|| tdefs
->args_ct_str
[i
] == NULL
);
2356 /* sort the constraints (XXX: this is just an heuristic) */
2357 sort_constraints(def
, 0, def
->nb_oargs
);
2358 sort_constraints(def
, def
->nb_oargs
, def
->nb_iargs
);
2362 void tcg_op_remove(TCGContext
*s
, TCGOp
*op
)
2368 label
= arg_label(op
->args
[0]);
2371 case INDEX_op_brcond_i32
:
2372 case INDEX_op_brcond_i64
:
2373 label
= arg_label(op
->args
[3]);
2376 case INDEX_op_brcond2_i32
:
2377 label
= arg_label(op
->args
[5]);
2384 QTAILQ_REMOVE(&s
->ops
, op
, link
);
2385 QTAILQ_INSERT_TAIL(&s
->free_ops
, op
, link
);
2388 #ifdef CONFIG_PROFILER
2389 qatomic_set(&s
->prof
.del_op_count
, s
->prof
.del_op_count
+ 1);
2393 static TCGOp
*tcg_op_alloc(TCGOpcode opc
)
2395 TCGContext
*s
= tcg_ctx
;
2398 if (likely(QTAILQ_EMPTY(&s
->free_ops
))) {
2399 op
= tcg_malloc(sizeof(TCGOp
));
2401 op
= QTAILQ_FIRST(&s
->free_ops
);
2402 QTAILQ_REMOVE(&s
->free_ops
, op
, link
);
2404 memset(op
, 0, offsetof(TCGOp
, link
));
2411 TCGOp
*tcg_emit_op(TCGOpcode opc
)
2413 TCGOp
*op
= tcg_op_alloc(opc
);
2414 QTAILQ_INSERT_TAIL(&tcg_ctx
->ops
, op
, link
);
2418 TCGOp
*tcg_op_insert_before(TCGContext
*s
, TCGOp
*old_op
, TCGOpcode opc
)
2420 TCGOp
*new_op
= tcg_op_alloc(opc
);
2421 QTAILQ_INSERT_BEFORE(old_op
, new_op
, link
);
2425 TCGOp
*tcg_op_insert_after(TCGContext
*s
, TCGOp
*old_op
, TCGOpcode opc
)
2427 TCGOp
*new_op
= tcg_op_alloc(opc
);
2428 QTAILQ_INSERT_AFTER(&s
->ops
, old_op
, new_op
, link
);
2432 /* Reachable analysis : remove unreachable code. */
2433 static void reachable_code_pass(TCGContext
*s
)
2435 TCGOp
*op
, *op_next
;
2438 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
2444 case INDEX_op_set_label
:
2445 label
= arg_label(op
->args
[0]);
2446 if (label
->refs
== 0) {
2448 * While there is an occasional backward branch, virtually
2449 * all branches generated by the translators are forward.
2450 * Which means that generally we will have already removed
2451 * all references to the label that will be, and there is
2452 * little to be gained by iterating.
2456 /* Once we see a label, insns become live again. */
2461 * Optimization can fold conditional branches to unconditional.
2462 * If we find a label with one reference which is preceded by
2463 * an unconditional branch to it, remove both. This needed to
2464 * wait until the dead code in between them was removed.
2466 if (label
->refs
== 1) {
2467 TCGOp
*op_prev
= QTAILQ_PREV(op
, link
);
2468 if (op_prev
->opc
== INDEX_op_br
&&
2469 label
== arg_label(op_prev
->args
[0])) {
2470 tcg_op_remove(s
, op_prev
);
2478 case INDEX_op_exit_tb
:
2479 case INDEX_op_goto_ptr
:
2480 /* Unconditional branches; everything following is dead. */
2485 /* Notice noreturn helper calls, raising exceptions. */
2486 call_flags
= op
->args
[TCGOP_CALLO(op
) + TCGOP_CALLI(op
) + 1];
2487 if (call_flags
& TCG_CALL_NO_RETURN
) {
2492 case INDEX_op_insn_start
:
2493 /* Never remove -- we need to keep these for unwind. */
2502 tcg_op_remove(s
, op
);
2510 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2511 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2513 /* For liveness_pass_1, the register preferences for a given temp. */
2514 static inline TCGRegSet
*la_temp_pref(TCGTemp
*ts
)
2516 return ts
->state_ptr
;
2519 /* For liveness_pass_1, reset the preferences for a given temp to the
2520 * maximal regset for its type.
2522 static inline void la_reset_pref(TCGTemp
*ts
)
2525 = (ts
->state
== TS_DEAD
? 0 : tcg_target_available_regs
[ts
->type
]);
2528 /* liveness analysis: end of function: all temps are dead, and globals
2529 should be in memory. */
2530 static void la_func_end(TCGContext
*s
, int ng
, int nt
)
2534 for (i
= 0; i
< ng
; ++i
) {
2535 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2536 la_reset_pref(&s
->temps
[i
]);
2538 for (i
= ng
; i
< nt
; ++i
) {
2539 s
->temps
[i
].state
= TS_DEAD
;
2540 la_reset_pref(&s
->temps
[i
]);
2544 /* liveness analysis: end of basic block: all temps are dead, globals
2545 and local temps should be in memory. */
2546 static void la_bb_end(TCGContext
*s
, int ng
, int nt
)
2550 for (i
= 0; i
< ng
; ++i
) {
2551 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2552 la_reset_pref(&s
->temps
[i
]);
2554 for (i
= ng
; i
< nt
; ++i
) {
2555 s
->temps
[i
].state
= (s
->temps
[i
].temp_local
2558 la_reset_pref(&s
->temps
[i
]);
2562 /* liveness analysis: sync globals back to memory. */
2563 static void la_global_sync(TCGContext
*s
, int ng
)
2567 for (i
= 0; i
< ng
; ++i
) {
2568 int state
= s
->temps
[i
].state
;
2569 s
->temps
[i
].state
= state
| TS_MEM
;
2570 if (state
== TS_DEAD
) {
2571 /* If the global was previously dead, reset prefs. */
2572 la_reset_pref(&s
->temps
[i
]);
2578 * liveness analysis: conditional branch: all temps are dead,
2579 * globals and local temps should be synced.
2581 static void la_bb_sync(TCGContext
*s
, int ng
, int nt
)
2583 la_global_sync(s
, ng
);
2585 for (int i
= ng
; i
< nt
; ++i
) {
2586 if (s
->temps
[i
].temp_local
) {
2587 int state
= s
->temps
[i
].state
;
2588 s
->temps
[i
].state
= state
| TS_MEM
;
2589 if (state
!= TS_DEAD
) {
2593 s
->temps
[i
].state
= TS_DEAD
;
2595 la_reset_pref(&s
->temps
[i
]);
2599 /* liveness analysis: sync globals back to memory and kill. */
2600 static void la_global_kill(TCGContext
*s
, int ng
)
2604 for (i
= 0; i
< ng
; i
++) {
2605 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2606 la_reset_pref(&s
->temps
[i
]);
2610 /* liveness analysis: note live globals crossing calls. */
2611 static void la_cross_call(TCGContext
*s
, int nt
)
2613 TCGRegSet mask
= ~tcg_target_call_clobber_regs
;
2616 for (i
= 0; i
< nt
; i
++) {
2617 TCGTemp
*ts
= &s
->temps
[i
];
2618 if (!(ts
->state
& TS_DEAD
)) {
2619 TCGRegSet
*pset
= la_temp_pref(ts
);
2620 TCGRegSet set
= *pset
;
2623 /* If the combination is not possible, restart. */
2625 set
= tcg_target_available_regs
[ts
->type
] & mask
;
2632 /* Liveness analysis : update the opc_arg_life array to tell if a
2633 given input arguments is dead. Instructions updating dead
2634 temporaries are removed. */
2635 static void liveness_pass_1(TCGContext
*s
)
2637 int nb_globals
= s
->nb_globals
;
2638 int nb_temps
= s
->nb_temps
;
2639 TCGOp
*op
, *op_prev
;
2643 prefs
= tcg_malloc(sizeof(TCGRegSet
) * nb_temps
);
2644 for (i
= 0; i
< nb_temps
; ++i
) {
2645 s
->temps
[i
].state_ptr
= prefs
+ i
;
2648 /* ??? Should be redundant with the exit_tb that ends the TB. */
2649 la_func_end(s
, nb_globals
, nb_temps
);
2651 QTAILQ_FOREACH_REVERSE_SAFE(op
, &s
->ops
, link
, op_prev
) {
2652 int nb_iargs
, nb_oargs
;
2653 TCGOpcode opc_new
, opc_new2
;
2655 TCGLifeData arg_life
= 0;
2657 TCGOpcode opc
= op
->opc
;
2658 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
2666 nb_oargs
= TCGOP_CALLO(op
);
2667 nb_iargs
= TCGOP_CALLI(op
);
2668 call_flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
2670 /* pure functions can be removed if their result is unused */
2671 if (call_flags
& TCG_CALL_NO_SIDE_EFFECTS
) {
2672 for (i
= 0; i
< nb_oargs
; i
++) {
2673 ts
= arg_temp(op
->args
[i
]);
2674 if (ts
->state
!= TS_DEAD
) {
2675 goto do_not_remove_call
;
2682 /* Output args are dead. */
2683 for (i
= 0; i
< nb_oargs
; i
++) {
2684 ts
= arg_temp(op
->args
[i
]);
2685 if (ts
->state
& TS_DEAD
) {
2686 arg_life
|= DEAD_ARG
<< i
;
2688 if (ts
->state
& TS_MEM
) {
2689 arg_life
|= SYNC_ARG
<< i
;
2691 ts
->state
= TS_DEAD
;
2694 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2695 op
->output_pref
[i
] = 0;
2698 if (!(call_flags
& (TCG_CALL_NO_WRITE_GLOBALS
|
2699 TCG_CALL_NO_READ_GLOBALS
))) {
2700 la_global_kill(s
, nb_globals
);
2701 } else if (!(call_flags
& TCG_CALL_NO_READ_GLOBALS
)) {
2702 la_global_sync(s
, nb_globals
);
2705 /* Record arguments that die in this helper. */
2706 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
2707 ts
= arg_temp(op
->args
[i
]);
2708 if (ts
&& ts
->state
& TS_DEAD
) {
2709 arg_life
|= DEAD_ARG
<< i
;
2713 /* For all live registers, remove call-clobbered prefs. */
2714 la_cross_call(s
, nb_temps
);
2716 nb_call_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
2718 /* Input arguments are live for preceding opcodes. */
2719 for (i
= 0; i
< nb_iargs
; i
++) {
2720 ts
= arg_temp(op
->args
[i
+ nb_oargs
]);
2721 if (ts
&& ts
->state
& TS_DEAD
) {
2722 /* For those arguments that die, and will be allocated
2723 * in registers, clear the register set for that arg,
2724 * to be filled in below. For args that will be on
2725 * the stack, reset to any available reg.
2728 = (i
< nb_call_regs
? 0 :
2729 tcg_target_available_regs
[ts
->type
]);
2730 ts
->state
&= ~TS_DEAD
;
2734 /* For each input argument, add its input register to prefs.
2735 If a temp is used once, this produces a single set bit. */
2736 for (i
= 0; i
< MIN(nb_call_regs
, nb_iargs
); i
++) {
2737 ts
= arg_temp(op
->args
[i
+ nb_oargs
]);
2739 tcg_regset_set_reg(*la_temp_pref(ts
),
2740 tcg_target_call_iarg_regs
[i
]);
2745 case INDEX_op_insn_start
:
2747 case INDEX_op_discard
:
2748 /* mark the temporary as dead */
2749 ts
= arg_temp(op
->args
[0]);
2750 ts
->state
= TS_DEAD
;
2754 case INDEX_op_add2_i32
:
2755 opc_new
= INDEX_op_add_i32
;
2757 case INDEX_op_sub2_i32
:
2758 opc_new
= INDEX_op_sub_i32
;
2760 case INDEX_op_add2_i64
:
2761 opc_new
= INDEX_op_add_i64
;
2763 case INDEX_op_sub2_i64
:
2764 opc_new
= INDEX_op_sub_i64
;
2768 /* Test if the high part of the operation is dead, but not
2769 the low part. The result can be optimized to a simple
2770 add or sub. This happens often for x86_64 guest when the
2771 cpu mode is set to 32 bit. */
2772 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
2773 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
2776 /* Replace the opcode and adjust the args in place,
2777 leaving 3 unused args at the end. */
2778 op
->opc
= opc
= opc_new
;
2779 op
->args
[1] = op
->args
[2];
2780 op
->args
[2] = op
->args
[4];
2781 /* Fall through and mark the single-word operation live. */
2787 case INDEX_op_mulu2_i32
:
2788 opc_new
= INDEX_op_mul_i32
;
2789 opc_new2
= INDEX_op_muluh_i32
;
2790 have_opc_new2
= TCG_TARGET_HAS_muluh_i32
;
2792 case INDEX_op_muls2_i32
:
2793 opc_new
= INDEX_op_mul_i32
;
2794 opc_new2
= INDEX_op_mulsh_i32
;
2795 have_opc_new2
= TCG_TARGET_HAS_mulsh_i32
;
2797 case INDEX_op_mulu2_i64
:
2798 opc_new
= INDEX_op_mul_i64
;
2799 opc_new2
= INDEX_op_muluh_i64
;
2800 have_opc_new2
= TCG_TARGET_HAS_muluh_i64
;
2802 case INDEX_op_muls2_i64
:
2803 opc_new
= INDEX_op_mul_i64
;
2804 opc_new2
= INDEX_op_mulsh_i64
;
2805 have_opc_new2
= TCG_TARGET_HAS_mulsh_i64
;
2810 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
2811 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
2812 /* Both parts of the operation are dead. */
2815 /* The high part of the operation is dead; generate the low. */
2816 op
->opc
= opc
= opc_new
;
2817 op
->args
[1] = op
->args
[2];
2818 op
->args
[2] = op
->args
[3];
2819 } else if (arg_temp(op
->args
[0])->state
== TS_DEAD
&& have_opc_new2
) {
2820 /* The low part of the operation is dead; generate the high. */
2821 op
->opc
= opc
= opc_new2
;
2822 op
->args
[0] = op
->args
[1];
2823 op
->args
[1] = op
->args
[2];
2824 op
->args
[2] = op
->args
[3];
2828 /* Mark the single-word operation live. */
2833 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2834 nb_iargs
= def
->nb_iargs
;
2835 nb_oargs
= def
->nb_oargs
;
2837 /* Test if the operation can be removed because all
2838 its outputs are dead. We assume that nb_oargs == 0
2839 implies side effects */
2840 if (!(def
->flags
& TCG_OPF_SIDE_EFFECTS
) && nb_oargs
!= 0) {
2841 for (i
= 0; i
< nb_oargs
; i
++) {
2842 if (arg_temp(op
->args
[i
])->state
!= TS_DEAD
) {
2851 tcg_op_remove(s
, op
);
2855 for (i
= 0; i
< nb_oargs
; i
++) {
2856 ts
= arg_temp(op
->args
[i
]);
2858 /* Remember the preference of the uses that followed. */
2859 op
->output_pref
[i
] = *la_temp_pref(ts
);
2861 /* Output args are dead. */
2862 if (ts
->state
& TS_DEAD
) {
2863 arg_life
|= DEAD_ARG
<< i
;
2865 if (ts
->state
& TS_MEM
) {
2866 arg_life
|= SYNC_ARG
<< i
;
2868 ts
->state
= TS_DEAD
;
2872 /* If end of basic block, update. */
2873 if (def
->flags
& TCG_OPF_BB_EXIT
) {
2874 la_func_end(s
, nb_globals
, nb_temps
);
2875 } else if (def
->flags
& TCG_OPF_COND_BRANCH
) {
2876 la_bb_sync(s
, nb_globals
, nb_temps
);
2877 } else if (def
->flags
& TCG_OPF_BB_END
) {
2878 la_bb_end(s
, nb_globals
, nb_temps
);
2879 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
2880 la_global_sync(s
, nb_globals
);
2881 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
2882 la_cross_call(s
, nb_temps
);
2886 /* Record arguments that die in this opcode. */
2887 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
2888 ts
= arg_temp(op
->args
[i
]);
2889 if (ts
->state
& TS_DEAD
) {
2890 arg_life
|= DEAD_ARG
<< i
;
2894 /* Input arguments are live for preceding opcodes. */
2895 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
2896 ts
= arg_temp(op
->args
[i
]);
2897 if (ts
->state
& TS_DEAD
) {
2898 /* For operands that were dead, initially allow
2899 all regs for the type. */
2900 *la_temp_pref(ts
) = tcg_target_available_regs
[ts
->type
];
2901 ts
->state
&= ~TS_DEAD
;
2905 /* Incorporate constraints for this operand. */
2907 case INDEX_op_mov_i32
:
2908 case INDEX_op_mov_i64
:
2909 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2910 have proper constraints. That said, special case
2911 moves to propagate preferences backward. */
2912 if (IS_DEAD_ARG(1)) {
2913 *la_temp_pref(arg_temp(op
->args
[0]))
2914 = *la_temp_pref(arg_temp(op
->args
[1]));
2919 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
2920 const TCGArgConstraint
*ct
= &def
->args_ct
[i
];
2921 TCGRegSet set
, *pset
;
2923 ts
= arg_temp(op
->args
[i
]);
2924 pset
= la_temp_pref(ts
);
2929 set
&= op
->output_pref
[ct
->alias_index
];
2931 /* If the combination is not possible, restart. */
2941 op
->life
= arg_life
;
2945 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2946 static bool liveness_pass_2(TCGContext
*s
)
2948 int nb_globals
= s
->nb_globals
;
2950 bool changes
= false;
2951 TCGOp
*op
, *op_next
;
2953 /* Create a temporary for each indirect global. */
2954 for (i
= 0; i
< nb_globals
; ++i
) {
2955 TCGTemp
*its
= &s
->temps
[i
];
2956 if (its
->indirect_reg
) {
2957 TCGTemp
*dts
= tcg_temp_alloc(s
);
2958 dts
->type
= its
->type
;
2959 dts
->base_type
= its
->base_type
;
2960 its
->state_ptr
= dts
;
2962 its
->state_ptr
= NULL
;
2964 /* All globals begin dead. */
2965 its
->state
= TS_DEAD
;
2967 for (nb_temps
= s
->nb_temps
; i
< nb_temps
; ++i
) {
2968 TCGTemp
*its
= &s
->temps
[i
];
2969 its
->state_ptr
= NULL
;
2970 its
->state
= TS_DEAD
;
2973 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
2974 TCGOpcode opc
= op
->opc
;
2975 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
2976 TCGLifeData arg_life
= op
->life
;
2977 int nb_iargs
, nb_oargs
, call_flags
;
2978 TCGTemp
*arg_ts
, *dir_ts
;
2980 if (opc
== INDEX_op_call
) {
2981 nb_oargs
= TCGOP_CALLO(op
);
2982 nb_iargs
= TCGOP_CALLI(op
);
2983 call_flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
2985 nb_iargs
= def
->nb_iargs
;
2986 nb_oargs
= def
->nb_oargs
;
2988 /* Set flags similar to how calls require. */
2989 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
2990 /* Like reading globals: sync_globals */
2991 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
2992 } else if (def
->flags
& TCG_OPF_BB_END
) {
2993 /* Like writing globals: save_globals */
2995 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
2996 /* Like reading globals: sync_globals */
2997 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
2999 /* No effect on globals. */
3000 call_flags
= (TCG_CALL_NO_READ_GLOBALS
|
3001 TCG_CALL_NO_WRITE_GLOBALS
);
3005 /* Make sure that input arguments are available. */
3006 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3007 arg_ts
= arg_temp(op
->args
[i
]);
3009 dir_ts
= arg_ts
->state_ptr
;
3010 if (dir_ts
&& arg_ts
->state
== TS_DEAD
) {
3011 TCGOpcode lopc
= (arg_ts
->type
== TCG_TYPE_I32
3014 TCGOp
*lop
= tcg_op_insert_before(s
, op
, lopc
);
3016 lop
->args
[0] = temp_arg(dir_ts
);
3017 lop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3018 lop
->args
[2] = arg_ts
->mem_offset
;
3020 /* Loaded, but synced with memory. */
3021 arg_ts
->state
= TS_MEM
;
3026 /* Perform input replacement, and mark inputs that became dead.
3027 No action is required except keeping temp_state up to date
3028 so that we reload when needed. */
3029 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3030 arg_ts
= arg_temp(op
->args
[i
]);
3032 dir_ts
= arg_ts
->state_ptr
;
3034 op
->args
[i
] = temp_arg(dir_ts
);
3036 if (IS_DEAD_ARG(i
)) {
3037 arg_ts
->state
= TS_DEAD
;
3043 /* Liveness analysis should ensure that the following are
3044 all correct, for call sites and basic block end points. */
3045 if (call_flags
& TCG_CALL_NO_READ_GLOBALS
) {
3047 } else if (call_flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
3048 for (i
= 0; i
< nb_globals
; ++i
) {
3049 /* Liveness should see that globals are synced back,
3050 that is, either TS_DEAD or TS_MEM. */
3051 arg_ts
= &s
->temps
[i
];
3052 tcg_debug_assert(arg_ts
->state_ptr
== 0
3053 || arg_ts
->state
!= 0);
3056 for (i
= 0; i
< nb_globals
; ++i
) {
3057 /* Liveness should see that globals are saved back,
3058 that is, TS_DEAD, waiting to be reloaded. */
3059 arg_ts
= &s
->temps
[i
];
3060 tcg_debug_assert(arg_ts
->state_ptr
== 0
3061 || arg_ts
->state
== TS_DEAD
);
3065 /* Outputs become available. */
3066 if (opc
== INDEX_op_mov_i32
|| opc
== INDEX_op_mov_i64
) {
3067 arg_ts
= arg_temp(op
->args
[0]);
3068 dir_ts
= arg_ts
->state_ptr
;
3070 op
->args
[0] = temp_arg(dir_ts
);
3073 /* The output is now live and modified. */
3076 if (NEED_SYNC_ARG(0)) {
3077 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
3080 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
);
3081 TCGTemp
*out_ts
= dir_ts
;
3083 if (IS_DEAD_ARG(0)) {
3084 out_ts
= arg_temp(op
->args
[1]);
3085 arg_ts
->state
= TS_DEAD
;
3086 tcg_op_remove(s
, op
);
3088 arg_ts
->state
= TS_MEM
;
3091 sop
->args
[0] = temp_arg(out_ts
);
3092 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3093 sop
->args
[2] = arg_ts
->mem_offset
;
3095 tcg_debug_assert(!IS_DEAD_ARG(0));
3099 for (i
= 0; i
< nb_oargs
; i
++) {
3100 arg_ts
= arg_temp(op
->args
[i
]);
3101 dir_ts
= arg_ts
->state_ptr
;
3105 op
->args
[i
] = temp_arg(dir_ts
);
3108 /* The output is now live and modified. */
3111 /* Sync outputs upon their last write. */
3112 if (NEED_SYNC_ARG(i
)) {
3113 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
3116 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
);
3118 sop
->args
[0] = temp_arg(dir_ts
);
3119 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3120 sop
->args
[2] = arg_ts
->mem_offset
;
3122 arg_ts
->state
= TS_MEM
;
3124 /* Drop outputs that are dead. */
3125 if (IS_DEAD_ARG(i
)) {
3126 arg_ts
->state
= TS_DEAD
;
3135 #ifdef CONFIG_DEBUG_TCG
3136 static void dump_regs(TCGContext
*s
)
3142 for(i
= 0; i
< s
->nb_temps
; i
++) {
3144 printf(" %10s: ", tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
3145 switch(ts
->val_type
) {
3147 printf("%s", tcg_target_reg_names
[ts
->reg
]);
3150 printf("%d(%s)", (int)ts
->mem_offset
,
3151 tcg_target_reg_names
[ts
->mem_base
->reg
]);
3153 case TEMP_VAL_CONST
:
3154 printf("$0x%" TCG_PRIlx
, ts
->val
);
3166 for(i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
3167 if (s
->reg_to_temp
[i
] != NULL
) {
3169 tcg_target_reg_names
[i
],
3170 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), s
->reg_to_temp
[i
]));
3175 static void check_regs(TCGContext
*s
)
3182 for (reg
= 0; reg
< TCG_TARGET_NB_REGS
; reg
++) {
3183 ts
= s
->reg_to_temp
[reg
];
3185 if (ts
->val_type
!= TEMP_VAL_REG
|| ts
->reg
!= reg
) {
3186 printf("Inconsistency for register %s:\n",
3187 tcg_target_reg_names
[reg
]);
3192 for (k
= 0; k
< s
->nb_temps
; k
++) {
3194 if (ts
->val_type
== TEMP_VAL_REG
&& !ts
->fixed_reg
3195 && s
->reg_to_temp
[ts
->reg
] != ts
) {
3196 printf("Inconsistency for temp %s:\n",
3197 tcg_get_arg_str_ptr(s
, buf
, sizeof(buf
), ts
));
3199 printf("reg state:\n");
3207 static void temp_allocate_frame(TCGContext
*s
, TCGTemp
*ts
)
3209 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3210 /* Sparc64 stack is accessed with offset of 2047 */
3211 s
->current_frame_offset
= (s
->current_frame_offset
+
3212 (tcg_target_long
)sizeof(tcg_target_long
) - 1) &
3213 ~(sizeof(tcg_target_long
) - 1);
3215 if (s
->current_frame_offset
+ (tcg_target_long
)sizeof(tcg_target_long
) >
3219 ts
->mem_offset
= s
->current_frame_offset
;
3220 ts
->mem_base
= s
->frame_temp
;
3221 ts
->mem_allocated
= 1;
3222 s
->current_frame_offset
+= sizeof(tcg_target_long
);
3225 static void temp_load(TCGContext
*, TCGTemp
*, TCGRegSet
, TCGRegSet
, TCGRegSet
);
3227 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3228 mark it free; otherwise mark it dead. */
3229 static void temp_free_or_dead(TCGContext
*s
, TCGTemp
*ts
, int free_or_dead
)
3231 if (ts
->fixed_reg
) {
3234 if (ts
->val_type
== TEMP_VAL_REG
) {
3235 s
->reg_to_temp
[ts
->reg
] = NULL
;
3237 ts
->val_type
= (free_or_dead
< 0
3240 ? TEMP_VAL_MEM
: TEMP_VAL_DEAD
);
3243 /* Mark a temporary as dead. */
3244 static inline void temp_dead(TCGContext
*s
, TCGTemp
*ts
)
3246 temp_free_or_dead(s
, ts
, 1);
3249 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3250 registers needs to be allocated to store a constant. If 'free_or_dead'
3251 is non-zero, subsequently release the temporary; if it is positive, the
3252 temp is dead; if it is negative, the temp is free. */
3253 static void temp_sync(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
,
3254 TCGRegSet preferred_regs
, int free_or_dead
)
3256 if (ts
->fixed_reg
) {
3259 if (!ts
->mem_coherent
) {
3260 if (!ts
->mem_allocated
) {
3261 temp_allocate_frame(s
, ts
);
3263 switch (ts
->val_type
) {
3264 case TEMP_VAL_CONST
:
3265 /* If we're going to free the temp immediately, then we won't
3266 require it later in a register, so attempt to store the
3267 constant to memory directly. */
3269 && tcg_out_sti(s
, ts
->type
, ts
->val
,
3270 ts
->mem_base
->reg
, ts
->mem_offset
)) {
3273 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3274 allocated_regs
, preferred_regs
);
3278 tcg_out_st(s
, ts
->type
, ts
->reg
,
3279 ts
->mem_base
->reg
, ts
->mem_offset
);
3289 ts
->mem_coherent
= 1;
3292 temp_free_or_dead(s
, ts
, free_or_dead
);
3296 /* free register 'reg' by spilling the corresponding temporary if necessary */
3297 static void tcg_reg_free(TCGContext
*s
, TCGReg reg
, TCGRegSet allocated_regs
)
3299 TCGTemp
*ts
= s
->reg_to_temp
[reg
];
3301 temp_sync(s
, ts
, allocated_regs
, 0, -1);
3307 * @required_regs: Set of registers in which we must allocate.
3308 * @allocated_regs: Set of registers which must be avoided.
3309 * @preferred_regs: Set of registers we should prefer.
3310 * @rev: True if we search the registers in "indirect" order.
3312 * The allocated register must be in @required_regs & ~@allocated_regs,
3313 * but if we can put it in @preferred_regs we may save a move later.
3315 static TCGReg
tcg_reg_alloc(TCGContext
*s
, TCGRegSet required_regs
,
3316 TCGRegSet allocated_regs
,
3317 TCGRegSet preferred_regs
, bool rev
)
3319 int i
, j
, f
, n
= ARRAY_SIZE(tcg_target_reg_alloc_order
);
3320 TCGRegSet reg_ct
[2];
3323 reg_ct
[1] = required_regs
& ~allocated_regs
;
3324 tcg_debug_assert(reg_ct
[1] != 0);
3325 reg_ct
[0] = reg_ct
[1] & preferred_regs
;
3327 /* Skip the preferred_regs option if it cannot be satisfied,
3328 or if the preference made no difference. */
3329 f
= reg_ct
[0] == 0 || reg_ct
[0] == reg_ct
[1];
3331 order
= rev
? indirect_reg_alloc_order
: tcg_target_reg_alloc_order
;
3333 /* Try free registers, preferences first. */
3334 for (j
= f
; j
< 2; j
++) {
3335 TCGRegSet set
= reg_ct
[j
];
3337 if (tcg_regset_single(set
)) {
3338 /* One register in the set. */
3339 TCGReg reg
= tcg_regset_first(set
);
3340 if (s
->reg_to_temp
[reg
] == NULL
) {
3344 for (i
= 0; i
< n
; i
++) {
3345 TCGReg reg
= order
[i
];
3346 if (s
->reg_to_temp
[reg
] == NULL
&&
3347 tcg_regset_test_reg(set
, reg
)) {
3354 /* We must spill something. */
3355 for (j
= f
; j
< 2; j
++) {
3356 TCGRegSet set
= reg_ct
[j
];
3358 if (tcg_regset_single(set
)) {
3359 /* One register in the set. */
3360 TCGReg reg
= tcg_regset_first(set
);
3361 tcg_reg_free(s
, reg
, allocated_regs
);
3364 for (i
= 0; i
< n
; i
++) {
3365 TCGReg reg
= order
[i
];
3366 if (tcg_regset_test_reg(set
, reg
)) {
3367 tcg_reg_free(s
, reg
, allocated_regs
);
3377 /* Make sure the temporary is in a register. If needed, allocate the register
3378 from DESIRED while avoiding ALLOCATED. */
3379 static void temp_load(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet desired_regs
,
3380 TCGRegSet allocated_regs
, TCGRegSet preferred_regs
)
3384 switch (ts
->val_type
) {
3387 case TEMP_VAL_CONST
:
3388 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3389 preferred_regs
, ts
->indirect_base
);
3390 if (ts
->type
<= TCG_TYPE_I64
) {
3391 tcg_out_movi(s
, ts
->type
, reg
, ts
->val
);
3393 uint64_t val
= ts
->val
;
3397 * Find the minimal vector element that matches the constant.
3398 * The targets will, in general, have to do this search anyway,
3399 * do this generically.
3401 if (TCG_TARGET_REG_BITS
== 32) {
3402 val
= dup_const(MO_32
, val
);
3405 if (val
== dup_const(MO_8
, val
)) {
3407 } else if (val
== dup_const(MO_16
, val
)) {
3409 } else if (TCG_TARGET_REG_BITS
== 64 &&
3410 val
== dup_const(MO_32
, val
)) {
3414 tcg_out_dupi_vec(s
, ts
->type
, vece
, reg
, ts
->val
);
3416 ts
->mem_coherent
= 0;
3419 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3420 preferred_regs
, ts
->indirect_base
);
3421 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_base
->reg
, ts
->mem_offset
);
3422 ts
->mem_coherent
= 1;
3429 ts
->val_type
= TEMP_VAL_REG
;
3430 s
->reg_to_temp
[reg
] = ts
;
3433 /* Save a temporary to memory. 'allocated_regs' is used in case a
3434 temporary registers needs to be allocated to store a constant. */
3435 static void temp_save(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
)
3437 /* The liveness analysis already ensures that globals are back
3438 in memory. Keep an tcg_debug_assert for safety. */
3439 tcg_debug_assert(ts
->val_type
== TEMP_VAL_MEM
|| ts
->fixed_reg
);
3442 /* save globals to their canonical location and assume they can be
3443 modified be the following code. 'allocated_regs' is used in case a
3444 temporary registers needs to be allocated to store a constant. */
3445 static void save_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3449 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3450 temp_save(s
, &s
->temps
[i
], allocated_regs
);
3454 /* sync globals to their canonical location and assume they can be
3455 read by the following code. 'allocated_regs' is used in case a
3456 temporary registers needs to be allocated to store a constant. */
3457 static void sync_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3461 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3462 TCGTemp
*ts
= &s
->temps
[i
];
3463 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
3465 || ts
->mem_coherent
);
3469 /* at the end of a basic block, we assume all temporaries are dead and
3470 all globals are stored at their canonical location. */
3471 static void tcg_reg_alloc_bb_end(TCGContext
*s
, TCGRegSet allocated_regs
)
3475 for (i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3476 TCGTemp
*ts
= &s
->temps
[i
];
3477 if (ts
->temp_local
) {
3478 temp_save(s
, ts
, allocated_regs
);
3480 /* The liveness analysis already ensures that temps are dead.
3481 Keep an tcg_debug_assert for safety. */
3482 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3486 save_globals(s
, allocated_regs
);
3490 * At a conditional branch, we assume all temporaries are dead and
3491 * all globals and local temps are synced to their location.
3493 static void tcg_reg_alloc_cbranch(TCGContext
*s
, TCGRegSet allocated_regs
)
3495 sync_globals(s
, allocated_regs
);
3497 for (int i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3498 TCGTemp
*ts
= &s
->temps
[i
];
3500 * The liveness analysis already ensures that temps are dead.
3501 * Keep tcg_debug_asserts for safety.
3503 if (ts
->temp_local
) {
3504 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
|| ts
->mem_coherent
);
3506 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3512 * Specialized code generation for INDEX_op_movi_*.
3514 static void tcg_reg_alloc_do_movi(TCGContext
*s
, TCGTemp
*ots
,
3515 tcg_target_ulong val
, TCGLifeData arg_life
,
3516 TCGRegSet preferred_regs
)
3518 /* ENV should not be modified. */
3519 tcg_debug_assert(!ots
->fixed_reg
);
3521 /* The movi is not explicitly generated here. */
3522 if (ots
->val_type
== TEMP_VAL_REG
) {
3523 s
->reg_to_temp
[ots
->reg
] = NULL
;
3525 ots
->val_type
= TEMP_VAL_CONST
;
3527 ots
->mem_coherent
= 0;
3528 if (NEED_SYNC_ARG(0)) {
3529 temp_sync(s
, ots
, s
->reserved_regs
, preferred_regs
, IS_DEAD_ARG(0));
3530 } else if (IS_DEAD_ARG(0)) {
3535 static void tcg_reg_alloc_movi(TCGContext
*s
, const TCGOp
*op
)
3537 TCGTemp
*ots
= arg_temp(op
->args
[0]);
3538 tcg_target_ulong val
= op
->args
[1];
3540 tcg_reg_alloc_do_movi(s
, ots
, val
, op
->life
, op
->output_pref
[0]);
3544 * Specialized code generation for INDEX_op_mov_*.
3546 static void tcg_reg_alloc_mov(TCGContext
*s
, const TCGOp
*op
)
3548 const TCGLifeData arg_life
= op
->life
;
3549 TCGRegSet allocated_regs
, preferred_regs
;
3551 TCGType otype
, itype
;
3553 allocated_regs
= s
->reserved_regs
;
3554 preferred_regs
= op
->output_pref
[0];
3555 ots
= arg_temp(op
->args
[0]);
3556 ts
= arg_temp(op
->args
[1]);
3558 /* ENV should not be modified. */
3559 tcg_debug_assert(!ots
->fixed_reg
);
3561 /* Note that otype != itype for no-op truncation. */
3565 if (ts
->val_type
== TEMP_VAL_CONST
) {
3566 /* propagate constant or generate sti */
3567 tcg_target_ulong val
= ts
->val
;
3568 if (IS_DEAD_ARG(1)) {
3571 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, preferred_regs
);
3575 /* If the source value is in memory we're going to be forced
3576 to have it in a register in order to perform the copy. Copy
3577 the SOURCE value into its own register first, that way we
3578 don't have to reload SOURCE the next time it is used. */
3579 if (ts
->val_type
== TEMP_VAL_MEM
) {
3580 temp_load(s
, ts
, tcg_target_available_regs
[itype
],
3581 allocated_regs
, preferred_regs
);
3584 tcg_debug_assert(ts
->val_type
== TEMP_VAL_REG
);
3585 if (IS_DEAD_ARG(0)) {
3586 /* mov to a non-saved dead register makes no sense (even with
3587 liveness analysis disabled). */
3588 tcg_debug_assert(NEED_SYNC_ARG(0));
3589 if (!ots
->mem_allocated
) {
3590 temp_allocate_frame(s
, ots
);
3592 tcg_out_st(s
, otype
, ts
->reg
, ots
->mem_base
->reg
, ots
->mem_offset
);
3593 if (IS_DEAD_ARG(1)) {
3598 if (IS_DEAD_ARG(1) && !ts
->fixed_reg
) {
3599 /* the mov can be suppressed */
3600 if (ots
->val_type
== TEMP_VAL_REG
) {
3601 s
->reg_to_temp
[ots
->reg
] = NULL
;
3606 if (ots
->val_type
!= TEMP_VAL_REG
) {
3607 /* When allocating a new register, make sure to not spill the
3609 tcg_regset_set_reg(allocated_regs
, ts
->reg
);
3610 ots
->reg
= tcg_reg_alloc(s
, tcg_target_available_regs
[otype
],
3611 allocated_regs
, preferred_regs
,
3612 ots
->indirect_base
);
3614 if (!tcg_out_mov(s
, otype
, ots
->reg
, ts
->reg
)) {
3616 * Cross register class move not supported.
3617 * Store the source register into the destination slot
3618 * and leave the destination temp as TEMP_VAL_MEM.
3620 assert(!ots
->fixed_reg
);
3621 if (!ts
->mem_allocated
) {
3622 temp_allocate_frame(s
, ots
);
3624 tcg_out_st(s
, ts
->type
, ts
->reg
,
3625 ots
->mem_base
->reg
, ots
->mem_offset
);
3626 ots
->mem_coherent
= 1;
3627 temp_free_or_dead(s
, ots
, -1);
3631 ots
->val_type
= TEMP_VAL_REG
;
3632 ots
->mem_coherent
= 0;
3633 s
->reg_to_temp
[ots
->reg
] = ots
;
3634 if (NEED_SYNC_ARG(0)) {
3635 temp_sync(s
, ots
, allocated_regs
, 0, 0);
3641 * Specialized code generation for INDEX_op_dup_vec.
3643 static void tcg_reg_alloc_dup(TCGContext
*s
, const TCGOp
*op
)
3645 const TCGLifeData arg_life
= op
->life
;
3646 TCGRegSet dup_out_regs
, dup_in_regs
;
3648 TCGType itype
, vtype
;
3649 intptr_t endian_fixup
;
3653 ots
= arg_temp(op
->args
[0]);
3654 its
= arg_temp(op
->args
[1]);
3656 /* ENV should not be modified. */
3657 tcg_debug_assert(!ots
->fixed_reg
);
3660 vece
= TCGOP_VECE(op
);
3661 vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
3663 if (its
->val_type
== TEMP_VAL_CONST
) {
3664 /* Propagate constant via movi -> dupi. */
3665 tcg_target_ulong val
= its
->val
;
3666 if (IS_DEAD_ARG(1)) {
3669 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, op
->output_pref
[0]);
3673 dup_out_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
3674 dup_in_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[1].regs
;
3676 /* Allocate the output register now. */
3677 if (ots
->val_type
!= TEMP_VAL_REG
) {
3678 TCGRegSet allocated_regs
= s
->reserved_regs
;
3680 if (!IS_DEAD_ARG(1) && its
->val_type
== TEMP_VAL_REG
) {
3681 /* Make sure to not spill the input register. */
3682 tcg_regset_set_reg(allocated_regs
, its
->reg
);
3684 ots
->reg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
3685 op
->output_pref
[0], ots
->indirect_base
);
3686 ots
->val_type
= TEMP_VAL_REG
;
3687 ots
->mem_coherent
= 0;
3688 s
->reg_to_temp
[ots
->reg
] = ots
;
3691 switch (its
->val_type
) {
3694 * The dup constriaints must be broad, covering all possible VECE.
3695 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3696 * to fail, indicating that extra moves are required for that case.
3698 if (tcg_regset_test_reg(dup_in_regs
, its
->reg
)) {
3699 if (tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, its
->reg
)) {
3702 /* Try again from memory or a vector input register. */
3704 if (!its
->mem_coherent
) {
3706 * The input register is not synced, and so an extra store
3707 * would be required to use memory. Attempt an integer-vector
3708 * register move first. We do not have a TCGRegSet for this.
3710 if (tcg_out_mov(s
, itype
, ots
->reg
, its
->reg
)) {
3713 /* Sync the temp back to its slot and load from there. */
3714 temp_sync(s
, its
, s
->reserved_regs
, 0, 0);
3719 #ifdef HOST_WORDS_BIGENDIAN
3720 endian_fixup
= itype
== TCG_TYPE_I32
? 4 : 8;
3721 endian_fixup
-= 1 << vece
;
3725 if (tcg_out_dupm_vec(s
, vtype
, vece
, ots
->reg
, its
->mem_base
->reg
,
3726 its
->mem_offset
+ endian_fixup
)) {
3729 tcg_out_ld(s
, itype
, ots
->reg
, its
->mem_base
->reg
, its
->mem_offset
);
3733 g_assert_not_reached();
3736 /* We now have a vector input register, so dup must succeed. */
3737 ok
= tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, ots
->reg
);
3738 tcg_debug_assert(ok
);
3741 if (IS_DEAD_ARG(1)) {
3744 if (NEED_SYNC_ARG(0)) {
3745 temp_sync(s
, ots
, s
->reserved_regs
, 0, 0);
3747 if (IS_DEAD_ARG(0)) {
3752 static void tcg_reg_alloc_op(TCGContext
*s
, const TCGOp
*op
)
3754 const TCGLifeData arg_life
= op
->life
;
3755 const TCGOpDef
* const def
= &tcg_op_defs
[op
->opc
];
3756 TCGRegSet i_allocated_regs
;
3757 TCGRegSet o_allocated_regs
;
3758 int i
, k
, nb_iargs
, nb_oargs
;
3761 const TCGArgConstraint
*arg_ct
;
3763 TCGArg new_args
[TCG_MAX_OP_ARGS
];
3764 int const_args
[TCG_MAX_OP_ARGS
];
3766 nb_oargs
= def
->nb_oargs
;
3767 nb_iargs
= def
->nb_iargs
;
3769 /* copy constants */
3770 memcpy(new_args
+ nb_oargs
+ nb_iargs
,
3771 op
->args
+ nb_oargs
+ nb_iargs
,
3772 sizeof(TCGArg
) * def
->nb_cargs
);
3774 i_allocated_regs
= s
->reserved_regs
;
3775 o_allocated_regs
= s
->reserved_regs
;
3777 /* satisfy input constraints */
3778 for (k
= 0; k
< nb_iargs
; k
++) {
3779 TCGRegSet i_preferred_regs
, o_preferred_regs
;
3781 i
= def
->args_ct
[nb_oargs
+ k
].sort_index
;
3783 arg_ct
= &def
->args_ct
[i
];
3786 if (ts
->val_type
== TEMP_VAL_CONST
3787 && tcg_target_const_match(ts
->val
, ts
->type
, arg_ct
)) {
3788 /* constant is OK for instruction */
3790 new_args
[i
] = ts
->val
;
3794 i_preferred_regs
= o_preferred_regs
= 0;
3795 if (arg_ct
->ialias
) {
3796 o_preferred_regs
= op
->output_pref
[arg_ct
->alias_index
];
3797 if (ts
->fixed_reg
) {
3798 /* if fixed register, we must allocate a new register
3799 if the alias is not the same register */
3800 if (arg
!= op
->args
[arg_ct
->alias_index
]) {
3801 goto allocate_in_reg
;
3804 /* if the input is aliased to an output and if it is
3805 not dead after the instruction, we must allocate
3806 a new register and move it */
3807 if (!IS_DEAD_ARG(i
)) {
3808 goto allocate_in_reg
;
3811 /* check if the current register has already been allocated
3812 for another input aliased to an output */
3813 if (ts
->val_type
== TEMP_VAL_REG
) {
3816 for (k2
= 0 ; k2
< k
; k2
++) {
3817 i2
= def
->args_ct
[nb_oargs
+ k2
].sort_index
;
3818 if (def
->args_ct
[i2
].ialias
&& reg
== new_args
[i2
]) {
3819 goto allocate_in_reg
;
3823 i_preferred_regs
= o_preferred_regs
;
3827 temp_load(s
, ts
, arg_ct
->regs
, i_allocated_regs
, i_preferred_regs
);
3830 if (tcg_regset_test_reg(arg_ct
->regs
, reg
)) {
3831 /* nothing to do : the constraint is satisfied */
3834 /* allocate a new register matching the constraint
3835 and move the temporary register into it */
3836 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3837 i_allocated_regs
, 0);
3838 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, i_allocated_regs
,
3839 o_preferred_regs
, ts
->indirect_base
);
3840 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
3842 * Cross register class move not supported. Sync the
3843 * temp back to its slot and load from there.
3845 temp_sync(s
, ts
, i_allocated_regs
, 0, 0);
3846 tcg_out_ld(s
, ts
->type
, reg
,
3847 ts
->mem_base
->reg
, ts
->mem_offset
);
3852 tcg_regset_set_reg(i_allocated_regs
, reg
);
3855 /* mark dead temporaries and free the associated registers */
3856 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3857 if (IS_DEAD_ARG(i
)) {
3858 temp_dead(s
, arg_temp(op
->args
[i
]));
3862 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
3863 tcg_reg_alloc_cbranch(s
, i_allocated_regs
);
3864 } else if (def
->flags
& TCG_OPF_BB_END
) {
3865 tcg_reg_alloc_bb_end(s
, i_allocated_regs
);
3867 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
3868 /* XXX: permit generic clobber register list ? */
3869 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
3870 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
3871 tcg_reg_free(s
, i
, i_allocated_regs
);
3875 if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
3876 /* sync globals if the op has side effects and might trigger
3878 sync_globals(s
, i_allocated_regs
);
3881 /* satisfy the output constraints */
3882 for(k
= 0; k
< nb_oargs
; k
++) {
3883 i
= def
->args_ct
[k
].sort_index
;
3885 arg_ct
= &def
->args_ct
[i
];
3888 /* ENV should not be modified. */
3889 tcg_debug_assert(!ts
->fixed_reg
);
3891 if (arg_ct
->oalias
&& !const_args
[arg_ct
->alias_index
]) {
3892 reg
= new_args
[arg_ct
->alias_index
];
3893 } else if (arg_ct
->newreg
) {
3894 reg
= tcg_reg_alloc(s
, arg_ct
->regs
,
3895 i_allocated_regs
| o_allocated_regs
,
3896 op
->output_pref
[k
], ts
->indirect_base
);
3898 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, o_allocated_regs
,
3899 op
->output_pref
[k
], ts
->indirect_base
);
3901 tcg_regset_set_reg(o_allocated_regs
, reg
);
3902 if (ts
->val_type
== TEMP_VAL_REG
) {
3903 s
->reg_to_temp
[ts
->reg
] = NULL
;
3905 ts
->val_type
= TEMP_VAL_REG
;
3908 * Temp value is modified, so the value kept in memory is
3909 * potentially not the same.
3911 ts
->mem_coherent
= 0;
3912 s
->reg_to_temp
[reg
] = ts
;
3917 /* emit instruction */
3918 if (def
->flags
& TCG_OPF_VECTOR
) {
3919 tcg_out_vec_op(s
, op
->opc
, TCGOP_VECL(op
), TCGOP_VECE(op
),
3920 new_args
, const_args
);
3922 tcg_out_op(s
, op
->opc
, new_args
, const_args
);
3925 /* move the outputs in the correct register if needed */
3926 for(i
= 0; i
< nb_oargs
; i
++) {
3927 ts
= arg_temp(op
->args
[i
]);
3929 /* ENV should not be modified. */
3930 tcg_debug_assert(!ts
->fixed_reg
);
3932 if (NEED_SYNC_ARG(i
)) {
3933 temp_sync(s
, ts
, o_allocated_regs
, 0, IS_DEAD_ARG(i
));
3934 } else if (IS_DEAD_ARG(i
)) {
3940 #ifdef TCG_TARGET_STACK_GROWSUP
3941 #define STACK_DIR(x) (-(x))
3943 #define STACK_DIR(x) (x)
3946 static void tcg_reg_alloc_call(TCGContext
*s
, TCGOp
*op
)
3948 const int nb_oargs
= TCGOP_CALLO(op
);
3949 const int nb_iargs
= TCGOP_CALLI(op
);
3950 const TCGLifeData arg_life
= op
->life
;
3951 int flags
, nb_regs
, i
;
3955 intptr_t stack_offset
;
3956 size_t call_stack_size
;
3957 tcg_insn_unit
*func_addr
;
3959 TCGRegSet allocated_regs
;
3961 func_addr
= (tcg_insn_unit
*)(intptr_t)op
->args
[nb_oargs
+ nb_iargs
];
3962 flags
= op
->args
[nb_oargs
+ nb_iargs
+ 1];
3964 nb_regs
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
3965 if (nb_regs
> nb_iargs
) {
3969 /* assign stack slots first */
3970 call_stack_size
= (nb_iargs
- nb_regs
) * sizeof(tcg_target_long
);
3971 call_stack_size
= (call_stack_size
+ TCG_TARGET_STACK_ALIGN
- 1) &
3972 ~(TCG_TARGET_STACK_ALIGN
- 1);
3973 allocate_args
= (call_stack_size
> TCG_STATIC_CALL_ARGS_SIZE
);
3974 if (allocate_args
) {
3975 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3976 preallocate call stack */
3980 stack_offset
= TCG_TARGET_CALL_STACK_OFFSET
;
3981 for (i
= nb_regs
; i
< nb_iargs
; i
++) {
3982 arg
= op
->args
[nb_oargs
+ i
];
3983 #ifdef TCG_TARGET_STACK_GROWSUP
3984 stack_offset
-= sizeof(tcg_target_long
);
3986 if (arg
!= TCG_CALL_DUMMY_ARG
) {
3988 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3989 s
->reserved_regs
, 0);
3990 tcg_out_st(s
, ts
->type
, ts
->reg
, TCG_REG_CALL_STACK
, stack_offset
);
3992 #ifndef TCG_TARGET_STACK_GROWSUP
3993 stack_offset
+= sizeof(tcg_target_long
);
3997 /* assign input registers */
3998 allocated_regs
= s
->reserved_regs
;
3999 for (i
= 0; i
< nb_regs
; i
++) {
4000 arg
= op
->args
[nb_oargs
+ i
];
4001 if (arg
!= TCG_CALL_DUMMY_ARG
) {
4003 reg
= tcg_target_call_iarg_regs
[i
];
4005 if (ts
->val_type
== TEMP_VAL_REG
) {
4006 if (ts
->reg
!= reg
) {
4007 tcg_reg_free(s
, reg
, allocated_regs
);
4008 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
4010 * Cross register class move not supported. Sync the
4011 * temp back to its slot and load from there.
4013 temp_sync(s
, ts
, allocated_regs
, 0, 0);
4014 tcg_out_ld(s
, ts
->type
, reg
,
4015 ts
->mem_base
->reg
, ts
->mem_offset
);
4019 TCGRegSet arg_set
= 0;
4021 tcg_reg_free(s
, reg
, allocated_regs
);
4022 tcg_regset_set_reg(arg_set
, reg
);
4023 temp_load(s
, ts
, arg_set
, allocated_regs
, 0);
4026 tcg_regset_set_reg(allocated_regs
, reg
);
4030 /* mark dead temporaries and free the associated registers */
4031 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
4032 if (IS_DEAD_ARG(i
)) {
4033 temp_dead(s
, arg_temp(op
->args
[i
]));
4037 /* clobber call registers */
4038 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
4039 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
4040 tcg_reg_free(s
, i
, allocated_regs
);
4044 /* Save globals if they might be written by the helper, sync them if
4045 they might be read. */
4046 if (flags
& TCG_CALL_NO_READ_GLOBALS
) {
4048 } else if (flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
4049 sync_globals(s
, allocated_regs
);
4051 save_globals(s
, allocated_regs
);
4054 tcg_out_call(s
, func_addr
);
4056 /* assign output registers and emit moves if needed */
4057 for(i
= 0; i
< nb_oargs
; i
++) {
4061 /* ENV should not be modified. */
4062 tcg_debug_assert(!ts
->fixed_reg
);
4064 reg
= tcg_target_call_oarg_regs
[i
];
4065 tcg_debug_assert(s
->reg_to_temp
[reg
] == NULL
);
4066 if (ts
->val_type
== TEMP_VAL_REG
) {
4067 s
->reg_to_temp
[ts
->reg
] = NULL
;
4069 ts
->val_type
= TEMP_VAL_REG
;
4071 ts
->mem_coherent
= 0;
4072 s
->reg_to_temp
[reg
] = ts
;
4073 if (NEED_SYNC_ARG(i
)) {
4074 temp_sync(s
, ts
, allocated_regs
, 0, IS_DEAD_ARG(i
));
4075 } else if (IS_DEAD_ARG(i
)) {
4081 #ifdef CONFIG_PROFILER
4083 /* avoid copy/paste errors */
4084 #define PROF_ADD(to, from, field) \
4086 (to)->field += qatomic_read(&((from)->field)); \
4089 #define PROF_MAX(to, from, field) \
4091 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4092 if (val__ > (to)->field) { \
4093 (to)->field = val__; \
4097 /* Pass in a zero'ed @prof */
4099 void tcg_profile_snapshot(TCGProfile
*prof
, bool counters
, bool table
)
4101 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
4104 for (i
= 0; i
< n_ctxs
; i
++) {
4105 TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4106 const TCGProfile
*orig
= &s
->prof
;
4109 PROF_ADD(prof
, orig
, cpu_exec_time
);
4110 PROF_ADD(prof
, orig
, tb_count1
);
4111 PROF_ADD(prof
, orig
, tb_count
);
4112 PROF_ADD(prof
, orig
, op_count
);
4113 PROF_MAX(prof
, orig
, op_count_max
);
4114 PROF_ADD(prof
, orig
, temp_count
);
4115 PROF_MAX(prof
, orig
, temp_count_max
);
4116 PROF_ADD(prof
, orig
, del_op_count
);
4117 PROF_ADD(prof
, orig
, code_in_len
);
4118 PROF_ADD(prof
, orig
, code_out_len
);
4119 PROF_ADD(prof
, orig
, search_out_len
);
4120 PROF_ADD(prof
, orig
, interm_time
);
4121 PROF_ADD(prof
, orig
, code_time
);
4122 PROF_ADD(prof
, orig
, la_time
);
4123 PROF_ADD(prof
, orig
, opt_time
);
4124 PROF_ADD(prof
, orig
, restore_count
);
4125 PROF_ADD(prof
, orig
, restore_time
);
4130 for (i
= 0; i
< NB_OPS
; i
++) {
4131 PROF_ADD(prof
, orig
, table_op_count
[i
]);
4140 static void tcg_profile_snapshot_counters(TCGProfile
*prof
)
4142 tcg_profile_snapshot(prof
, true, false);
4145 static void tcg_profile_snapshot_table(TCGProfile
*prof
)
4147 tcg_profile_snapshot(prof
, false, true);
4150 void tcg_dump_op_count(void)
4152 TCGProfile prof
= {};
4155 tcg_profile_snapshot_table(&prof
);
4156 for (i
= 0; i
< NB_OPS
; i
++) {
4157 qemu_printf("%s %" PRId64
"\n", tcg_op_defs
[i
].name
,
4158 prof
.table_op_count
[i
]);
4162 int64_t tcg_cpu_exec_time(void)
4164 unsigned int n_ctxs
= qatomic_read(&n_tcg_ctxs
);
4168 for (i
= 0; i
< n_ctxs
; i
++) {
4169 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4170 const TCGProfile
*prof
= &s
->prof
;
4172 ret
+= qatomic_read(&prof
->cpu_exec_time
);
4177 void tcg_dump_op_count(void)
4179 qemu_printf("[TCG profiler not compiled]\n");
4182 int64_t tcg_cpu_exec_time(void)
4184 error_report("%s: TCG profiler not compiled", __func__
);
4190 int tcg_gen_code(TCGContext
*s
, TranslationBlock
*tb
)
4192 #ifdef CONFIG_PROFILER
4193 TCGProfile
*prof
= &s
->prof
;
4198 #ifdef CONFIG_PROFILER
4202 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4205 qatomic_set(&prof
->op_count
, prof
->op_count
+ n
);
4206 if (n
> prof
->op_count_max
) {
4207 qatomic_set(&prof
->op_count_max
, n
);
4211 qatomic_set(&prof
->temp_count
, prof
->temp_count
+ n
);
4212 if (n
> prof
->temp_count_max
) {
4213 qatomic_set(&prof
->temp_count_max
, n
);
4219 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP
)
4220 && qemu_log_in_addr_range(tb
->pc
))) {
4221 FILE *logfile
= qemu_log_lock();
4223 tcg_dump_ops(s
, false);
4225 qemu_log_unlock(logfile
);
4229 #ifdef CONFIG_DEBUG_TCG
4230 /* Ensure all labels referenced have been emitted. */
4235 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
4236 if (unlikely(!l
->present
) && l
->refs
) {
4237 qemu_log_mask(CPU_LOG_TB_OP
,
4238 "$L%d referenced but not present.\n", l
->id
);
4246 #ifdef CONFIG_PROFILER
4247 qatomic_set(&prof
->opt_time
, prof
->opt_time
- profile_getclock());
4250 #ifdef USE_TCG_OPTIMIZATIONS
4254 #ifdef CONFIG_PROFILER
4255 qatomic_set(&prof
->opt_time
, prof
->opt_time
+ profile_getclock());
4256 qatomic_set(&prof
->la_time
, prof
->la_time
- profile_getclock());
4259 reachable_code_pass(s
);
4262 if (s
->nb_indirects
> 0) {
4264 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND
)
4265 && qemu_log_in_addr_range(tb
->pc
))) {
4266 FILE *logfile
= qemu_log_lock();
4267 qemu_log("OP before indirect lowering:\n");
4268 tcg_dump_ops(s
, false);
4270 qemu_log_unlock(logfile
);
4273 /* Replace indirect temps with direct temps. */
4274 if (liveness_pass_2(s
)) {
4275 /* If changes were made, re-run liveness. */
4280 #ifdef CONFIG_PROFILER
4281 qatomic_set(&prof
->la_time
, prof
->la_time
+ profile_getclock());
4285 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT
)
4286 && qemu_log_in_addr_range(tb
->pc
))) {
4287 FILE *logfile
= qemu_log_lock();
4288 qemu_log("OP after optimization and liveness analysis:\n");
4289 tcg_dump_ops(s
, true);
4291 qemu_log_unlock(logfile
);
4295 tcg_reg_alloc_start(s
);
4298 * Reset the buffer pointers when restarting after overflow.
4299 * TODO: Move this into translate-all.c with the rest of the
4300 * buffer management. Having only this done here is confusing.
4302 s
->code_buf
= tcg_splitwx_to_rw(tb
->tc
.ptr
);
4303 s
->code_ptr
= s
->code_buf
;
4305 #ifdef TCG_TARGET_NEED_LDST_LABELS
4306 QSIMPLEQ_INIT(&s
->ldst_labels
);
4308 #ifdef TCG_TARGET_NEED_POOL_LABELS
4309 s
->pool_labels
= NULL
;
4313 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4314 TCGOpcode opc
= op
->opc
;
4316 #ifdef CONFIG_PROFILER
4317 qatomic_set(&prof
->table_op_count
[opc
], prof
->table_op_count
[opc
] + 1);
4321 case INDEX_op_mov_i32
:
4322 case INDEX_op_mov_i64
:
4323 case INDEX_op_mov_vec
:
4324 tcg_reg_alloc_mov(s
, op
);
4326 case INDEX_op_movi_i32
:
4327 case INDEX_op_movi_i64
:
4328 case INDEX_op_dupi_vec
:
4329 tcg_reg_alloc_movi(s
, op
);
4331 case INDEX_op_dup_vec
:
4332 tcg_reg_alloc_dup(s
, op
);
4334 case INDEX_op_insn_start
:
4335 if (num_insns
>= 0) {
4336 size_t off
= tcg_current_code_size(s
);
4337 s
->gen_insn_end_off
[num_insns
] = off
;
4338 /* Assert that we do not overflow our stored offset. */
4339 assert(s
->gen_insn_end_off
[num_insns
] == off
);
4342 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
4344 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4345 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
4349 s
->gen_insn_data
[num_insns
][i
] = a
;
4352 case INDEX_op_discard
:
4353 temp_dead(s
, arg_temp(op
->args
[0]));
4355 case INDEX_op_set_label
:
4356 tcg_reg_alloc_bb_end(s
, s
->reserved_regs
);
4357 tcg_out_label(s
, arg_label(op
->args
[0]));
4360 tcg_reg_alloc_call(s
, op
);
4363 /* Sanity check that we've not introduced any unhandled opcodes. */
4364 tcg_debug_assert(tcg_op_supported(opc
));
4365 /* Note: in order to speed up the code, it would be much
4366 faster to have specialized register allocator functions for
4367 some common argument patterns */
4368 tcg_reg_alloc_op(s
, op
);
4371 #ifdef CONFIG_DEBUG_TCG
4374 /* Test for (pending) buffer overflow. The assumption is that any
4375 one operation beginning below the high water mark cannot overrun
4376 the buffer completely. Thus we can test for overflow after
4377 generating code without having to check during generation. */
4378 if (unlikely((void *)s
->code_ptr
> s
->code_gen_highwater
)) {
4381 /* Test for TB overflow, as seen by gen_insn_end_off. */
4382 if (unlikely(tcg_current_code_size(s
) > UINT16_MAX
)) {
4386 tcg_debug_assert(num_insns
>= 0);
4387 s
->gen_insn_end_off
[num_insns
] = tcg_current_code_size(s
);
4389 /* Generate TB finalization at the end of block */
4390 #ifdef TCG_TARGET_NEED_LDST_LABELS
4391 i
= tcg_out_ldst_finalize(s
);
4396 #ifdef TCG_TARGET_NEED_POOL_LABELS
4397 i
= tcg_out_pool_finalize(s
);
4402 if (!tcg_resolve_relocs(s
)) {
4406 #ifndef CONFIG_TCG_INTERPRETER
4407 /* flush instruction cache */
4408 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s
->code_buf
),
4409 (uintptr_t)s
->code_buf
,
4410 tcg_ptr_byte_diff(s
->code_ptr
, s
->code_buf
));
4413 return tcg_current_code_size(s
);
4416 #ifdef CONFIG_PROFILER
4417 void tcg_dump_info(void)
4419 TCGProfile prof
= {};
4420 const TCGProfile
*s
;
4422 int64_t tb_div_count
;
4425 tcg_profile_snapshot_counters(&prof
);
4427 tb_count
= s
->tb_count
;
4428 tb_div_count
= tb_count
? tb_count
: 1;
4429 tot
= s
->interm_time
+ s
->code_time
;
4431 qemu_printf("JIT cycles %" PRId64
" (%0.3f s at 2.4 GHz)\n",
4433 qemu_printf("translated TBs %" PRId64
" (aborted=%" PRId64
4435 tb_count
, s
->tb_count1
- tb_count
,
4436 (double)(s
->tb_count1
- s
->tb_count
)
4437 / (s
->tb_count1
? s
->tb_count1
: 1) * 100.0);
4438 qemu_printf("avg ops/TB %0.1f max=%d\n",
4439 (double)s
->op_count
/ tb_div_count
, s
->op_count_max
);
4440 qemu_printf("deleted ops/TB %0.2f\n",
4441 (double)s
->del_op_count
/ tb_div_count
);
4442 qemu_printf("avg temps/TB %0.2f max=%d\n",
4443 (double)s
->temp_count
/ tb_div_count
, s
->temp_count_max
);
4444 qemu_printf("avg host code/TB %0.1f\n",
4445 (double)s
->code_out_len
/ tb_div_count
);
4446 qemu_printf("avg search data/TB %0.1f\n",
4447 (double)s
->search_out_len
/ tb_div_count
);
4449 qemu_printf("cycles/op %0.1f\n",
4450 s
->op_count
? (double)tot
/ s
->op_count
: 0);
4451 qemu_printf("cycles/in byte %0.1f\n",
4452 s
->code_in_len
? (double)tot
/ s
->code_in_len
: 0);
4453 qemu_printf("cycles/out byte %0.1f\n",
4454 s
->code_out_len
? (double)tot
/ s
->code_out_len
: 0);
4455 qemu_printf("cycles/search byte %0.1f\n",
4456 s
->search_out_len
? (double)tot
/ s
->search_out_len
: 0);
4460 qemu_printf(" gen_interm time %0.1f%%\n",
4461 (double)s
->interm_time
/ tot
* 100.0);
4462 qemu_printf(" gen_code time %0.1f%%\n",
4463 (double)s
->code_time
/ tot
* 100.0);
4464 qemu_printf("optim./code time %0.1f%%\n",
4465 (double)s
->opt_time
/ (s
->code_time
? s
->code_time
: 1)
4467 qemu_printf("liveness/code time %0.1f%%\n",
4468 (double)s
->la_time
/ (s
->code_time
? s
->code_time
: 1) * 100.0);
4469 qemu_printf("cpu_restore count %" PRId64
"\n",
4471 qemu_printf(" avg cycles %0.1f\n",
4472 s
->restore_count
? (double)s
->restore_time
/ s
->restore_count
: 0);
4475 void tcg_dump_info(void)
4477 qemu_printf("[TCG profiler not compiled]\n");
4481 #ifdef ELF_HOST_MACHINE
4482 /* In order to use this feature, the backend needs to do three things:
4484 (1) Define ELF_HOST_MACHINE to indicate both what value to
4485 put into the ELF image and to indicate support for the feature.
4487 (2) Define tcg_register_jit. This should create a buffer containing
4488 the contents of a .debug_frame section that describes the post-
4489 prologue unwind info for the tcg machine.
4491 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4494 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4501 struct jit_code_entry
{
4502 struct jit_code_entry
*next_entry
;
4503 struct jit_code_entry
*prev_entry
;
4504 const void *symfile_addr
;
4505 uint64_t symfile_size
;
4508 struct jit_descriptor
{
4510 uint32_t action_flag
;
4511 struct jit_code_entry
*relevant_entry
;
4512 struct jit_code_entry
*first_entry
;
4515 void __jit_debug_register_code(void) __attribute__((noinline
));
4516 void __jit_debug_register_code(void)
4521 /* Must statically initialize the version, because GDB may check
4522 the version before we can set it. */
4523 struct jit_descriptor __jit_debug_descriptor
= { 1, 0, 0, 0 };
4525 /* End GDB interface. */
4527 static int find_string(const char *strtab
, const char *str
)
4529 const char *p
= strtab
+ 1;
4532 if (strcmp(p
, str
) == 0) {
4539 static void tcg_register_jit_int(const void *buf_ptr
, size_t buf_size
,
4540 const void *debug_frame
,
4541 size_t debug_frame_size
)
4543 struct __attribute__((packed
)) DebugInfo
{
4550 uintptr_t cu_low_pc
;
4551 uintptr_t cu_high_pc
;
4554 uintptr_t fn_low_pc
;
4555 uintptr_t fn_high_pc
;
4564 struct DebugInfo di
;
4569 struct ElfImage
*img
;
4571 static const struct ElfImage img_template
= {
4573 .e_ident
[EI_MAG0
] = ELFMAG0
,
4574 .e_ident
[EI_MAG1
] = ELFMAG1
,
4575 .e_ident
[EI_MAG2
] = ELFMAG2
,
4576 .e_ident
[EI_MAG3
] = ELFMAG3
,
4577 .e_ident
[EI_CLASS
] = ELF_CLASS
,
4578 .e_ident
[EI_DATA
] = ELF_DATA
,
4579 .e_ident
[EI_VERSION
] = EV_CURRENT
,
4581 .e_machine
= ELF_HOST_MACHINE
,
4582 .e_version
= EV_CURRENT
,
4583 .e_phoff
= offsetof(struct ElfImage
, phdr
),
4584 .e_shoff
= offsetof(struct ElfImage
, shdr
),
4585 .e_ehsize
= sizeof(ElfW(Shdr
)),
4586 .e_phentsize
= sizeof(ElfW(Phdr
)),
4588 .e_shentsize
= sizeof(ElfW(Shdr
)),
4589 .e_shnum
= ARRAY_SIZE(img
->shdr
),
4590 .e_shstrndx
= ARRAY_SIZE(img
->shdr
) - 1,
4591 #ifdef ELF_HOST_FLAGS
4592 .e_flags
= ELF_HOST_FLAGS
,
4595 .e_ident
[EI_OSABI
] = ELF_OSABI
,
4603 [0] = { .sh_type
= SHT_NULL
},
4604 /* Trick: The contents of code_gen_buffer are not present in
4605 this fake ELF file; that got allocated elsewhere. Therefore
4606 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4607 will not look for contents. We can record any address. */
4609 .sh_type
= SHT_NOBITS
,
4610 .sh_flags
= SHF_EXECINSTR
| SHF_ALLOC
,
4612 [2] = { /* .debug_info */
4613 .sh_type
= SHT_PROGBITS
,
4614 .sh_offset
= offsetof(struct ElfImage
, di
),
4615 .sh_size
= sizeof(struct DebugInfo
),
4617 [3] = { /* .debug_abbrev */
4618 .sh_type
= SHT_PROGBITS
,
4619 .sh_offset
= offsetof(struct ElfImage
, da
),
4620 .sh_size
= sizeof(img
->da
),
4622 [4] = { /* .debug_frame */
4623 .sh_type
= SHT_PROGBITS
,
4624 .sh_offset
= sizeof(struct ElfImage
),
4626 [5] = { /* .symtab */
4627 .sh_type
= SHT_SYMTAB
,
4628 .sh_offset
= offsetof(struct ElfImage
, sym
),
4629 .sh_size
= sizeof(img
->sym
),
4631 .sh_link
= ARRAY_SIZE(img
->shdr
) - 1,
4632 .sh_entsize
= sizeof(ElfW(Sym
)),
4634 [6] = { /* .strtab */
4635 .sh_type
= SHT_STRTAB
,
4636 .sh_offset
= offsetof(struct ElfImage
, str
),
4637 .sh_size
= sizeof(img
->str
),
4641 [1] = { /* code_gen_buffer */
4642 .st_info
= ELF_ST_INFO(STB_GLOBAL
, STT_FUNC
),
4647 .len
= sizeof(struct DebugInfo
) - 4,
4649 .ptr_size
= sizeof(void *),
4651 .cu_lang
= 0x8001, /* DW_LANG_Mips_Assembler */
4653 .fn_name
= "code_gen_buffer"
4656 1, /* abbrev number (the cu) */
4657 0x11, 1, /* DW_TAG_compile_unit, has children */
4658 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4659 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4660 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4661 0, 0, /* end of abbrev */
4662 2, /* abbrev number (the fn) */
4663 0x2e, 0, /* DW_TAG_subprogram, no children */
4664 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4665 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4666 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4667 0, 0, /* end of abbrev */
4668 0 /* no more abbrev */
4670 .str
= "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4671 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4674 /* We only need a single jit entry; statically allocate it. */
4675 static struct jit_code_entry one_entry
;
4677 uintptr_t buf
= (uintptr_t)buf_ptr
;
4678 size_t img_size
= sizeof(struct ElfImage
) + debug_frame_size
;
4679 DebugFrameHeader
*dfh
;
4681 img
= g_malloc(img_size
);
4682 *img
= img_template
;
4684 img
->phdr
.p_vaddr
= buf
;
4685 img
->phdr
.p_paddr
= buf
;
4686 img
->phdr
.p_memsz
= buf_size
;
4688 img
->shdr
[1].sh_name
= find_string(img
->str
, ".text");
4689 img
->shdr
[1].sh_addr
= buf
;
4690 img
->shdr
[1].sh_size
= buf_size
;
4692 img
->shdr
[2].sh_name
= find_string(img
->str
, ".debug_info");
4693 img
->shdr
[3].sh_name
= find_string(img
->str
, ".debug_abbrev");
4695 img
->shdr
[4].sh_name
= find_string(img
->str
, ".debug_frame");
4696 img
->shdr
[4].sh_size
= debug_frame_size
;
4698 img
->shdr
[5].sh_name
= find_string(img
->str
, ".symtab");
4699 img
->shdr
[6].sh_name
= find_string(img
->str
, ".strtab");
4701 img
->sym
[1].st_name
= find_string(img
->str
, "code_gen_buffer");
4702 img
->sym
[1].st_value
= buf
;
4703 img
->sym
[1].st_size
= buf_size
;
4705 img
->di
.cu_low_pc
= buf
;
4706 img
->di
.cu_high_pc
= buf
+ buf_size
;
4707 img
->di
.fn_low_pc
= buf
;
4708 img
->di
.fn_high_pc
= buf
+ buf_size
;
4710 dfh
= (DebugFrameHeader
*)(img
+ 1);
4711 memcpy(dfh
, debug_frame
, debug_frame_size
);
4712 dfh
->fde
.func_start
= buf
;
4713 dfh
->fde
.func_len
= buf_size
;
4716 /* Enable this block to be able to debug the ELF image file creation.
4717 One can use readelf, objdump, or other inspection utilities. */
4719 FILE *f
= fopen("/tmp/qemu.jit", "w+b");
4721 if (fwrite(img
, img_size
, 1, f
) != img_size
) {
4722 /* Avoid stupid unused return value warning for fwrite. */
4729 one_entry
.symfile_addr
= img
;
4730 one_entry
.symfile_size
= img_size
;
4732 __jit_debug_descriptor
.action_flag
= JIT_REGISTER_FN
;
4733 __jit_debug_descriptor
.relevant_entry
= &one_entry
;
4734 __jit_debug_descriptor
.first_entry
= &one_entry
;
4735 __jit_debug_register_code();
4738 /* No support for the feature. Provide the entry point expected by exec.c,
4739 and implement the internal function we declared earlier. */
4741 static void tcg_register_jit_int(const void *buf
, size_t size
,
4742 const void *debug_frame
,
4743 size_t debug_frame_size
)
4747 void tcg_register_jit(const void *buf
, size_t buf_size
)
4750 #endif /* ELF_HOST_MACHINE */
4752 #if !TCG_TARGET_MAYBE_vec
4753 void tcg_expand_vec_op(TCGOpcode o
, TCGType t
, unsigned e
, TCGArg a0
, ...)
4755 g_assert_not_reached();