2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
43 #define NO_CPU_IO_DEFS
45 #include "exec/exec-all.h"
46 #include "tcg/tcg-op.h"
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS ELFCLASS32
51 # define ELF_CLASS ELFCLASS64
54 # define ELF_DATA ELFDATA2MSB
56 # define ELF_DATA ELFDATA2LSB
61 #include "tcg/tcg-ldst.h"
62 #include "tcg-internal.h"
63 #include "accel/tcg/perf.h"
65 /* Forward declarations for functions declared in tcg-target.c.inc and
67 static void tcg_target_init(TCGContext
*s
);
68 static void tcg_target_qemu_prologue(TCGContext
*s
);
69 static bool patch_reloc(tcg_insn_unit
*code_ptr
, int type
,
70 intptr_t value
, intptr_t addend
);
72 /* The CIE and FDE header definitions will be common to all hosts. */
74 uint32_t len
__attribute__((aligned((sizeof(void *)))));
80 uint8_t return_column
;
83 typedef struct QEMU_PACKED
{
84 uint32_t len
__attribute__((aligned((sizeof(void *)))));
88 } DebugFrameFDEHeader
;
90 typedef struct QEMU_PACKED
{
92 DebugFrameFDEHeader fde
;
95 static void tcg_register_jit_int(const void *buf
, size_t size
,
96 const void *debug_frame
,
97 size_t debug_frame_size
)
98 __attribute__((unused
));
100 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
101 static void tcg_out_ld(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg1
,
103 static bool tcg_out_mov(TCGContext
*s
, TCGType type
, TCGReg ret
, TCGReg arg
);
104 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
105 TCGReg ret
, tcg_target_long arg
);
106 static void tcg_out_addi_ptr(TCGContext
*s
, TCGReg
, TCGReg
, tcg_target_long
);
107 static void tcg_out_exit_tb(TCGContext
*s
, uintptr_t arg
);
108 static void tcg_out_goto_tb(TCGContext
*s
, int which
);
109 static void tcg_out_op(TCGContext
*s
, TCGOpcode opc
,
110 const TCGArg args
[TCG_MAX_OP_ARGS
],
111 const int const_args
[TCG_MAX_OP_ARGS
]);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
114 TCGReg dst
, TCGReg src
);
115 static bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
116 TCGReg dst
, TCGReg base
, intptr_t offset
);
117 static void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
118 TCGReg dst
, int64_t arg
);
119 static void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
,
120 unsigned vecl
, unsigned vece
,
121 const TCGArg args
[TCG_MAX_OP_ARGS
],
122 const int const_args
[TCG_MAX_OP_ARGS
]);
124 static inline bool tcg_out_dup_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
125 TCGReg dst
, TCGReg src
)
127 g_assert_not_reached();
129 static inline bool tcg_out_dupm_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
130 TCGReg dst
, TCGReg base
, intptr_t offset
)
132 g_assert_not_reached();
134 static inline void tcg_out_dupi_vec(TCGContext
*s
, TCGType type
, unsigned vece
,
135 TCGReg dst
, int64_t arg
)
137 g_assert_not_reached();
139 static inline void tcg_out_vec_op(TCGContext
*s
, TCGOpcode opc
,
140 unsigned vecl
, unsigned vece
,
141 const TCGArg args
[TCG_MAX_OP_ARGS
],
142 const int const_args
[TCG_MAX_OP_ARGS
])
144 g_assert_not_reached();
147 static void tcg_out_st(TCGContext
*s
, TCGType type
, TCGReg arg
, TCGReg arg1
,
149 static bool tcg_out_sti(TCGContext
*s
, TCGType type
, TCGArg val
,
150 TCGReg base
, intptr_t ofs
);
151 static void tcg_out_call(TCGContext
*s
, const tcg_insn_unit
*target
,
152 const TCGHelperInfo
*info
);
153 static TCGReg
tcg_target_call_oarg_reg(TCGCallReturnKind kind
, int slot
);
154 static bool tcg_target_const_match(int64_t val
, TCGType type
, int ct
);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext
*s
);
159 TCGContext tcg_init_ctx
;
160 __thread TCGContext
*tcg_ctx
;
162 TCGContext
**tcg_ctxs
;
163 unsigned int tcg_cur_ctxs
;
164 unsigned int tcg_max_ctxs
;
165 TCGv_env cpu_env
= 0;
166 const void *tcg_code_gen_epilogue
;
167 uintptr_t tcg_splitwx_diff
;
169 #ifndef CONFIG_TCG_INTERPRETER
170 tcg_prologue_fn
*tcg_qemu_tb_exec
;
173 static TCGRegSet tcg_target_available_regs
[TCG_TYPE_COUNT
];
174 static TCGRegSet tcg_target_call_clobber_regs
;
176 #if TCG_TARGET_INSN_UNIT_SIZE == 1
177 static __attribute__((unused
)) inline void tcg_out8(TCGContext
*s
, uint8_t v
)
182 static __attribute__((unused
)) inline void tcg_patch8(tcg_insn_unit
*p
,
189 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
190 static __attribute__((unused
)) inline void tcg_out16(TCGContext
*s
, uint16_t v
)
192 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
195 tcg_insn_unit
*p
= s
->code_ptr
;
196 memcpy(p
, &v
, sizeof(v
));
197 s
->code_ptr
= p
+ (2 / TCG_TARGET_INSN_UNIT_SIZE
);
201 static __attribute__((unused
)) inline void tcg_patch16(tcg_insn_unit
*p
,
204 if (TCG_TARGET_INSN_UNIT_SIZE
== 2) {
207 memcpy(p
, &v
, sizeof(v
));
212 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
213 static __attribute__((unused
)) inline void tcg_out32(TCGContext
*s
, uint32_t v
)
215 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
218 tcg_insn_unit
*p
= s
->code_ptr
;
219 memcpy(p
, &v
, sizeof(v
));
220 s
->code_ptr
= p
+ (4 / TCG_TARGET_INSN_UNIT_SIZE
);
224 static __attribute__((unused
)) inline void tcg_patch32(tcg_insn_unit
*p
,
227 if (TCG_TARGET_INSN_UNIT_SIZE
== 4) {
230 memcpy(p
, &v
, sizeof(v
));
235 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
236 static __attribute__((unused
)) inline void tcg_out64(TCGContext
*s
, uint64_t v
)
238 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
241 tcg_insn_unit
*p
= s
->code_ptr
;
242 memcpy(p
, &v
, sizeof(v
));
243 s
->code_ptr
= p
+ (8 / TCG_TARGET_INSN_UNIT_SIZE
);
247 static __attribute__((unused
)) inline void tcg_patch64(tcg_insn_unit
*p
,
250 if (TCG_TARGET_INSN_UNIT_SIZE
== 8) {
253 memcpy(p
, &v
, sizeof(v
));
258 /* label relocation processing */
260 static void tcg_out_reloc(TCGContext
*s
, tcg_insn_unit
*code_ptr
, int type
,
261 TCGLabel
*l
, intptr_t addend
)
263 TCGRelocation
*r
= tcg_malloc(sizeof(TCGRelocation
));
268 QSIMPLEQ_INSERT_TAIL(&l
->relocs
, r
, next
);
271 static void tcg_out_label(TCGContext
*s
, TCGLabel
*l
)
273 tcg_debug_assert(!l
->has_value
);
275 l
->u
.value_ptr
= tcg_splitwx_to_rx(s
->code_ptr
);
278 TCGLabel
*gen_new_label(void)
280 TCGContext
*s
= tcg_ctx
;
281 TCGLabel
*l
= tcg_malloc(sizeof(TCGLabel
));
283 memset(l
, 0, sizeof(TCGLabel
));
284 l
->id
= s
->nb_labels
++;
285 QSIMPLEQ_INIT(&l
->relocs
);
287 QSIMPLEQ_INSERT_TAIL(&s
->labels
, l
, next
);
292 static bool tcg_resolve_relocs(TCGContext
*s
)
296 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
298 uintptr_t value
= l
->u
.value
;
300 QSIMPLEQ_FOREACH(r
, &l
->relocs
, next
) {
301 if (!patch_reloc(r
->ptr
, r
->type
, value
, r
->addend
)) {
309 static void set_jmp_reset_offset(TCGContext
*s
, int which
)
312 * We will check for overflow at the end of the opcode loop in
313 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
315 s
->gen_tb
->jmp_reset_offset
[which
] = tcg_current_code_size(s
);
318 static void G_GNUC_UNUSED
set_jmp_insn_offset(TCGContext
*s
, int which
)
321 * We will check for overflow at the end of the opcode loop in
322 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
324 s
->gen_tb
->jmp_insn_offset
[which
] = tcg_current_code_size(s
);
327 static uintptr_t G_GNUC_UNUSED
get_jmp_target_addr(TCGContext
*s
, int which
)
330 * Return the read-execute version of the pointer, for the benefit
331 * of any pc-relative addressing mode.
333 return (uintptr_t)tcg_splitwx_to_rx(&s
->gen_tb
->jmp_target_addr
[which
]);
336 /* Signal overflow, starting over with fewer guest insns. */
338 void tcg_raise_tb_overflow(TCGContext
*s
)
340 siglongjmp(s
->jmp_trans
, -2);
343 #define C_PFX1(P, A) P##A
344 #define C_PFX2(P, A, B) P##A##_##B
345 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
346 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
347 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
348 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
350 /* Define an enumeration for the various combinations. */
352 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
353 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
354 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
355 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
357 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
358 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
359 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
360 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
362 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
364 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
365 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
366 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
367 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
370 #include "tcg-target-con-set.h"
371 } TCGConstraintSetIndex
;
373 static TCGConstraintSetIndex
tcg_target_op_def(TCGOpcode
);
389 /* Put all of the constraint sets into an array, indexed by the enum. */
391 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
392 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
393 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
394 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
396 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
397 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
398 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
399 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
401 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
403 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
404 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
405 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
406 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
408 static const TCGTargetOpDef constraint_sets
[] = {
409 #include "tcg-target-con-set.h"
427 /* Expand the enumerator to be returned from tcg_target_op_def(). */
429 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
430 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
431 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
432 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
434 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
435 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
436 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
437 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
439 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
441 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
442 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
443 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
444 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
446 #include "tcg-target.c.inc"
448 static void alloc_tcg_plugin_context(TCGContext
*s
)
451 s
->plugin_tb
= g_new0(struct qemu_plugin_tb
, 1);
452 s
->plugin_tb
->insns
=
453 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn
);
458 * All TCG threads except the parent (i.e. the one that called tcg_context_init
459 * and registered the target's TCG globals) must register with this function
460 * before initiating translation.
462 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
463 * of tcg_region_init() for the reasoning behind this.
465 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
466 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
467 * is not used anymore for translation once this function is called.
469 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
470 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
472 #ifdef CONFIG_USER_ONLY
473 void tcg_register_thread(void)
475 tcg_ctx
= &tcg_init_ctx
;
478 void tcg_register_thread(void)
480 TCGContext
*s
= g_malloc(sizeof(*s
));
485 /* Relink mem_base. */
486 for (i
= 0, n
= tcg_init_ctx
.nb_globals
; i
< n
; ++i
) {
487 if (tcg_init_ctx
.temps
[i
].mem_base
) {
488 ptrdiff_t b
= tcg_init_ctx
.temps
[i
].mem_base
- tcg_init_ctx
.temps
;
489 tcg_debug_assert(b
>= 0 && b
< n
);
490 s
->temps
[i
].mem_base
= &s
->temps
[b
];
494 /* Claim an entry in tcg_ctxs */
495 n
= qatomic_fetch_inc(&tcg_cur_ctxs
);
496 g_assert(n
< tcg_max_ctxs
);
497 qatomic_set(&tcg_ctxs
[n
], s
);
500 alloc_tcg_plugin_context(s
);
501 tcg_region_initial_alloc(s
);
506 #endif /* !CONFIG_USER_ONLY */
508 /* pool based memory allocation */
509 void *tcg_malloc_internal(TCGContext
*s
, int size
)
514 if (size
> TCG_POOL_CHUNK_SIZE
) {
515 /* big malloc: insert a new pool (XXX: could optimize) */
516 p
= g_malloc(sizeof(TCGPool
) + size
);
518 p
->next
= s
->pool_first_large
;
519 s
->pool_first_large
= p
;
530 pool_size
= TCG_POOL_CHUNK_SIZE
;
531 p
= g_malloc(sizeof(TCGPool
) + pool_size
);
534 if (s
->pool_current
) {
535 s
->pool_current
->next
= p
;
545 s
->pool_cur
= p
->data
+ size
;
546 s
->pool_end
= p
->data
+ p
->size
;
550 void tcg_pool_reset(TCGContext
*s
)
553 for (p
= s
->pool_first_large
; p
; p
= t
) {
557 s
->pool_first_large
= NULL
;
558 s
->pool_cur
= s
->pool_end
= NULL
;
559 s
->pool_current
= NULL
;
562 #include "exec/helper-proto.h"
564 static TCGHelperInfo all_helpers
[] = {
565 #include "exec/helper-tcg.h"
567 static GHashTable
*helper_table
;
569 #ifdef CONFIG_TCG_INTERPRETER
570 static ffi_type
*typecode_to_ffi(int argmask
)
573 * libffi does not support __int128_t, so we have forced Int128
574 * to use the structure definition instead of the builtin type.
576 static ffi_type
*ffi_type_i128_elements
[3] = {
581 static ffi_type ffi_type_i128
= {
583 .alignment
= __alignof__(Int128
),
584 .type
= FFI_TYPE_STRUCT
,
585 .elements
= ffi_type_i128_elements
,
589 case dh_typecode_void
:
590 return &ffi_type_void
;
591 case dh_typecode_i32
:
592 return &ffi_type_uint32
;
593 case dh_typecode_s32
:
594 return &ffi_type_sint32
;
595 case dh_typecode_i64
:
596 return &ffi_type_uint64
;
597 case dh_typecode_s64
:
598 return &ffi_type_sint64
;
599 case dh_typecode_ptr
:
600 return &ffi_type_pointer
;
601 case dh_typecode_i128
:
602 return &ffi_type_i128
;
604 g_assert_not_reached();
607 static void init_ffi_layouts(void)
609 /* g_direct_hash/equal for direct comparisons on uint32_t. */
610 GHashTable
*ffi_table
= g_hash_table_new(NULL
, NULL
);
612 for (int i
= 0; i
< ARRAY_SIZE(all_helpers
); ++i
) {
613 TCGHelperInfo
*info
= &all_helpers
[i
];
614 unsigned typemask
= info
->typemask
;
615 gpointer hash
= (gpointer
)(uintptr_t)typemask
;
624 cif
= g_hash_table_lookup(ffi_table
, hash
);
630 /* Ignoring the return type, find the last non-zero field. */
631 nargs
= 32 - clz32(typemask
>> 3);
632 nargs
= DIV_ROUND_UP(nargs
, 3);
633 assert(nargs
<= MAX_CALL_IARGS
);
635 ca
= g_malloc0(sizeof(*ca
) + nargs
* sizeof(ffi_type
*));
636 ca
->cif
.rtype
= typecode_to_ffi(typemask
& 7);
637 ca
->cif
.nargs
= nargs
;
640 ca
->cif
.arg_types
= ca
->args
;
641 for (int j
= 0; j
< nargs
; ++j
) {
642 int typecode
= extract32(typemask
, (j
+ 1) * 3, 3);
643 ca
->args
[j
] = typecode_to_ffi(typecode
);
647 status
= ffi_prep_cif(&ca
->cif
, FFI_DEFAULT_ABI
, nargs
,
648 ca
->cif
.rtype
, ca
->cif
.arg_types
);
649 assert(status
== FFI_OK
);
653 g_hash_table_insert(ffi_table
, hash
, (gpointer
)cif
);
656 g_hash_table_destroy(ffi_table
);
658 #endif /* CONFIG_TCG_INTERPRETER */
660 typedef struct TCGCumulativeArgs
{
661 int arg_idx
; /* tcg_gen_callN args[] */
662 int info_in_idx
; /* TCGHelperInfo in[] */
663 int arg_slot
; /* regs+stack slot */
664 int ref_slot
; /* stack slots for references */
667 static void layout_arg_even(TCGCumulativeArgs
*cum
)
669 cum
->arg_slot
+= cum
->arg_slot
& 1;
672 static void layout_arg_1(TCGCumulativeArgs
*cum
, TCGHelperInfo
*info
,
673 TCGCallArgumentKind kind
)
675 TCGCallArgumentLoc
*loc
= &info
->in
[cum
->info_in_idx
];
677 *loc
= (TCGCallArgumentLoc
){
679 .arg_idx
= cum
->arg_idx
,
680 .arg_slot
= cum
->arg_slot
,
686 static void layout_arg_normal_n(TCGCumulativeArgs
*cum
,
687 TCGHelperInfo
*info
, int n
)
689 TCGCallArgumentLoc
*loc
= &info
->in
[cum
->info_in_idx
];
691 for (int i
= 0; i
< n
; ++i
) {
692 /* Layout all using the same arg_idx, adjusting the subindex. */
693 loc
[i
] = (TCGCallArgumentLoc
){
694 .kind
= TCG_CALL_ARG_NORMAL
,
695 .arg_idx
= cum
->arg_idx
,
697 .arg_slot
= cum
->arg_slot
+ i
,
700 cum
->info_in_idx
+= n
;
704 static void layout_arg_by_ref(TCGCumulativeArgs
*cum
, TCGHelperInfo
*info
)
706 TCGCallArgumentLoc
*loc
= &info
->in
[cum
->info_in_idx
];
707 int n
= 128 / TCG_TARGET_REG_BITS
;
709 /* The first subindex carries the pointer. */
710 layout_arg_1(cum
, info
, TCG_CALL_ARG_BY_REF
);
713 * The callee is allowed to clobber memory associated with
714 * structure pass by-reference. Therefore we must make copies.
715 * Allocate space from "ref_slot", which will be adjusted to
716 * follow the parameters on the stack.
718 loc
[0].ref_slot
= cum
->ref_slot
;
721 * Subsequent words also go into the reference slot, but
722 * do not accumulate into the regular arguments.
724 for (int i
= 1; i
< n
; ++i
) {
725 loc
[i
] = (TCGCallArgumentLoc
){
726 .kind
= TCG_CALL_ARG_BY_REF_N
,
727 .arg_idx
= cum
->arg_idx
,
729 .ref_slot
= cum
->ref_slot
+ i
,
732 cum
->info_in_idx
+= n
;
736 static void init_call_layout(TCGHelperInfo
*info
)
738 int max_reg_slots
= ARRAY_SIZE(tcg_target_call_iarg_regs
);
739 int max_stk_slots
= TCG_STATIC_CALL_ARGS_SIZE
/ sizeof(tcg_target_long
);
740 unsigned typemask
= info
->typemask
;
742 TCGCumulativeArgs cum
= { };
745 * Parse and place any function return value.
747 typecode
= typemask
& 7;
749 case dh_typecode_void
:
752 case dh_typecode_i32
:
753 case dh_typecode_s32
:
754 case dh_typecode_ptr
:
756 info
->out_kind
= TCG_CALL_RET_NORMAL
;
758 case dh_typecode_i64
:
759 case dh_typecode_s64
:
760 info
->nr_out
= 64 / TCG_TARGET_REG_BITS
;
761 info
->out_kind
= TCG_CALL_RET_NORMAL
;
762 /* Query the last register now to trigger any assert early. */
763 tcg_target_call_oarg_reg(info
->out_kind
, info
->nr_out
- 1);
765 case dh_typecode_i128
:
766 info
->nr_out
= 128 / TCG_TARGET_REG_BITS
;
767 info
->out_kind
= TCG_TARGET_CALL_RET_I128
;
768 switch (TCG_TARGET_CALL_RET_I128
) {
769 case TCG_CALL_RET_NORMAL
:
770 /* Query the last register now to trigger any assert early. */
771 tcg_target_call_oarg_reg(info
->out_kind
, info
->nr_out
- 1);
773 case TCG_CALL_RET_BY_VEC
:
774 /* Query the single register now to trigger any assert early. */
775 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC
, 0);
777 case TCG_CALL_RET_BY_REF
:
779 * Allocate the first argument to the output.
780 * We don't need to store this anywhere, just make it
781 * unavailable for use in the input loop below.
786 qemu_build_not_reached();
790 g_assert_not_reached();
794 * Parse and place function arguments.
796 for (typemask
>>= 3; typemask
; typemask
>>= 3, cum
.arg_idx
++) {
797 TCGCallArgumentKind kind
;
800 typecode
= typemask
& 7;
802 case dh_typecode_i32
:
803 case dh_typecode_s32
:
806 case dh_typecode_i64
:
807 case dh_typecode_s64
:
810 case dh_typecode_ptr
:
813 case dh_typecode_i128
:
814 type
= TCG_TYPE_I128
;
817 g_assert_not_reached();
822 switch (TCG_TARGET_CALL_ARG_I32
) {
823 case TCG_CALL_ARG_EVEN
:
824 layout_arg_even(&cum
);
826 case TCG_CALL_ARG_NORMAL
:
827 layout_arg_1(&cum
, info
, TCG_CALL_ARG_NORMAL
);
829 case TCG_CALL_ARG_EXTEND
:
830 kind
= TCG_CALL_ARG_EXTEND_U
+ (typecode
& 1);
831 layout_arg_1(&cum
, info
, kind
);
834 qemu_build_not_reached();
839 switch (TCG_TARGET_CALL_ARG_I64
) {
840 case TCG_CALL_ARG_EVEN
:
841 layout_arg_even(&cum
);
843 case TCG_CALL_ARG_NORMAL
:
844 if (TCG_TARGET_REG_BITS
== 32) {
845 layout_arg_normal_n(&cum
, info
, 2);
847 layout_arg_1(&cum
, info
, TCG_CALL_ARG_NORMAL
);
851 qemu_build_not_reached();
856 switch (TCG_TARGET_CALL_ARG_I128
) {
857 case TCG_CALL_ARG_EVEN
:
858 layout_arg_even(&cum
);
860 case TCG_CALL_ARG_NORMAL
:
861 layout_arg_normal_n(&cum
, info
, 128 / TCG_TARGET_REG_BITS
);
863 case TCG_CALL_ARG_BY_REF
:
864 layout_arg_by_ref(&cum
, info
);
867 qemu_build_not_reached();
872 g_assert_not_reached();
875 info
->nr_in
= cum
.info_in_idx
;
877 /* Validate that we didn't overrun the input array. */
878 assert(cum
.info_in_idx
<= ARRAY_SIZE(info
->in
));
879 /* Validate the backend has enough argument space. */
880 assert(cum
.arg_slot
<= max_reg_slots
+ max_stk_slots
);
883 * Relocate the "ref_slot" area to the end of the parameters.
884 * Minimizing this stack offset helps code size for x86,
885 * which has a signed 8-bit offset encoding.
887 if (cum
.ref_slot
!= 0) {
890 if (cum
.arg_slot
> max_reg_slots
) {
891 int align
= __alignof(Int128
) / sizeof(tcg_target_long
);
893 ref_base
= cum
.arg_slot
- max_reg_slots
;
895 ref_base
= ROUND_UP(ref_base
, align
);
898 assert(ref_base
+ cum
.ref_slot
<= max_stk_slots
);
901 for (int i
= cum
.info_in_idx
- 1; i
>= 0; --i
) {
902 TCGCallArgumentLoc
*loc
= &info
->in
[i
];
904 case TCG_CALL_ARG_BY_REF
:
905 case TCG_CALL_ARG_BY_REF_N
:
906 loc
->ref_slot
+= ref_base
;
916 static int indirect_reg_alloc_order
[ARRAY_SIZE(tcg_target_reg_alloc_order
)];
917 static void process_op_defs(TCGContext
*s
);
918 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
919 TCGReg reg
, const char *name
);
921 static void tcg_context_init(unsigned max_cpus
)
923 TCGContext
*s
= &tcg_init_ctx
;
924 int op
, total_args
, n
, i
;
926 TCGArgConstraint
*args_ct
;
929 memset(s
, 0, sizeof(*s
));
932 /* Count total number of arguments and allocate the corresponding
935 for(op
= 0; op
< NB_OPS
; op
++) {
936 def
= &tcg_op_defs
[op
];
937 n
= def
->nb_iargs
+ def
->nb_oargs
;
941 args_ct
= g_new0(TCGArgConstraint
, total_args
);
943 for(op
= 0; op
< NB_OPS
; op
++) {
944 def
= &tcg_op_defs
[op
];
945 def
->args_ct
= args_ct
;
946 n
= def
->nb_iargs
+ def
->nb_oargs
;
950 /* Register helpers. */
951 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
952 helper_table
= g_hash_table_new(NULL
, NULL
);
954 for (i
= 0; i
< ARRAY_SIZE(all_helpers
); ++i
) {
955 init_call_layout(&all_helpers
[i
]);
956 g_hash_table_insert(helper_table
, (gpointer
)all_helpers
[i
].func
,
957 (gpointer
)&all_helpers
[i
]);
960 #ifdef CONFIG_TCG_INTERPRETER
967 /* Reverse the order of the saved registers, assuming they're all at
968 the start of tcg_target_reg_alloc_order. */
969 for (n
= 0; n
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++n
) {
970 int r
= tcg_target_reg_alloc_order
[n
];
971 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, r
)) {
975 for (i
= 0; i
< n
; ++i
) {
976 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[n
- 1 - i
];
978 for (; i
< ARRAY_SIZE(tcg_target_reg_alloc_order
); ++i
) {
979 indirect_reg_alloc_order
[i
] = tcg_target_reg_alloc_order
[i
];
982 alloc_tcg_plugin_context(s
);
986 * In user-mode we simply share the init context among threads, since we
987 * use a single region. See the documentation tcg_region_init() for the
988 * reasoning behind this.
989 * In softmmu we will have at most max_cpus TCG threads.
991 #ifdef CONFIG_USER_ONLY
996 tcg_max_ctxs
= max_cpus
;
997 tcg_ctxs
= g_new0(TCGContext
*, max_cpus
);
1000 tcg_debug_assert(!tcg_regset_test_reg(s
->reserved_regs
, TCG_AREG0
));
1001 ts
= tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, TCG_AREG0
, "env");
1002 cpu_env
= temp_tcgv_ptr(ts
);
1005 void tcg_init(size_t tb_size
, int splitwx
, unsigned max_cpus
)
1007 tcg_context_init(max_cpus
);
1008 tcg_region_init(tb_size
, splitwx
, max_cpus
);
1012 * Allocate TBs right before their corresponding translated code, making
1013 * sure that TBs and code are on different cache lines.
1015 TranslationBlock
*tcg_tb_alloc(TCGContext
*s
)
1017 uintptr_t align
= qemu_icache_linesize
;
1018 TranslationBlock
*tb
;
1022 tb
= (void *)ROUND_UP((uintptr_t)s
->code_gen_ptr
, align
);
1023 next
= (void *)ROUND_UP((uintptr_t)(tb
+ 1), align
);
1025 if (unlikely(next
> s
->code_gen_highwater
)) {
1026 if (tcg_region_alloc(s
)) {
1031 qatomic_set(&s
->code_gen_ptr
, next
);
1032 s
->data_gen_ptr
= NULL
;
1036 void tcg_prologue_init(TCGContext
*s
)
1038 size_t prologue_size
;
1040 s
->code_ptr
= s
->code_gen_ptr
;
1041 s
->code_buf
= s
->code_gen_ptr
;
1042 s
->data_gen_ptr
= NULL
;
1044 #ifndef CONFIG_TCG_INTERPRETER
1045 tcg_qemu_tb_exec
= (tcg_prologue_fn
*)tcg_splitwx_to_rx(s
->code_ptr
);
1048 #ifdef TCG_TARGET_NEED_POOL_LABELS
1049 s
->pool_labels
= NULL
;
1052 qemu_thread_jit_write();
1053 /* Generate the prologue. */
1054 tcg_target_qemu_prologue(s
);
1056 #ifdef TCG_TARGET_NEED_POOL_LABELS
1057 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1059 int result
= tcg_out_pool_finalize(s
);
1060 tcg_debug_assert(result
== 0);
1064 prologue_size
= tcg_current_code_size(s
);
1065 perf_report_prologue(s
->code_gen_ptr
, prologue_size
);
1067 #ifndef CONFIG_TCG_INTERPRETER
1068 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s
->code_buf
),
1069 (uintptr_t)s
->code_buf
, prologue_size
);
1073 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM
)) {
1074 FILE *logfile
= qemu_log_trylock();
1076 fprintf(logfile
, "PROLOGUE: [size=%zu]\n", prologue_size
);
1077 if (s
->data_gen_ptr
) {
1078 size_t code_size
= s
->data_gen_ptr
- s
->code_gen_ptr
;
1079 size_t data_size
= prologue_size
- code_size
;
1082 disas(logfile
, s
->code_gen_ptr
, code_size
);
1084 for (i
= 0; i
< data_size
; i
+= sizeof(tcg_target_ulong
)) {
1085 if (sizeof(tcg_target_ulong
) == 8) {
1087 "0x%08" PRIxPTR
": .quad 0x%016" PRIx64
"\n",
1088 (uintptr_t)s
->data_gen_ptr
+ i
,
1089 *(uint64_t *)(s
->data_gen_ptr
+ i
));
1092 "0x%08" PRIxPTR
": .long 0x%08x\n",
1093 (uintptr_t)s
->data_gen_ptr
+ i
,
1094 *(uint32_t *)(s
->data_gen_ptr
+ i
));
1098 disas(logfile
, s
->code_gen_ptr
, prologue_size
);
1100 fprintf(logfile
, "\n");
1101 qemu_log_unlock(logfile
);
1106 #ifndef CONFIG_TCG_INTERPRETER
1108 * Assert that goto_ptr is implemented completely, setting an epilogue.
1109 * For tci, we use NULL as the signal to return from the interpreter,
1110 * so skip this check.
1112 tcg_debug_assert(tcg_code_gen_epilogue
!= NULL
);
1115 tcg_region_prologue_set(s
);
1118 void tcg_func_start(TCGContext
*s
)
1121 s
->nb_temps
= s
->nb_globals
;
1123 /* No temps have been previously allocated for size or locality. */
1124 memset(s
->free_temps
, 0, sizeof(s
->free_temps
));
1126 /* No constant temps have been previously allocated. */
1127 for (int i
= 0; i
< TCG_TYPE_COUNT
; ++i
) {
1128 if (s
->const_table
[i
]) {
1129 g_hash_table_remove_all(s
->const_table
[i
]);
1135 s
->current_frame_offset
= s
->frame_start
;
1137 #ifdef CONFIG_DEBUG_TCG
1138 s
->goto_tb_issue_mask
= 0;
1141 QTAILQ_INIT(&s
->ops
);
1142 QTAILQ_INIT(&s
->free_ops
);
1143 QSIMPLEQ_INIT(&s
->labels
);
1146 static TCGTemp
*tcg_temp_alloc(TCGContext
*s
)
1148 int n
= s
->nb_temps
++;
1150 if (n
>= TCG_MAX_TEMPS
) {
1151 tcg_raise_tb_overflow(s
);
1153 return memset(&s
->temps
[n
], 0, sizeof(TCGTemp
));
1156 static TCGTemp
*tcg_global_alloc(TCGContext
*s
)
1160 tcg_debug_assert(s
->nb_globals
== s
->nb_temps
);
1161 tcg_debug_assert(s
->nb_globals
< TCG_MAX_TEMPS
);
1163 ts
= tcg_temp_alloc(s
);
1164 ts
->kind
= TEMP_GLOBAL
;
1169 static TCGTemp
*tcg_global_reg_new_internal(TCGContext
*s
, TCGType type
,
1170 TCGReg reg
, const char *name
)
1174 if (TCG_TARGET_REG_BITS
== 32 && type
!= TCG_TYPE_I32
) {
1178 ts
= tcg_global_alloc(s
);
1179 ts
->base_type
= type
;
1181 ts
->kind
= TEMP_FIXED
;
1184 tcg_regset_set_reg(s
->reserved_regs
, reg
);
1189 void tcg_set_frame(TCGContext
*s
, TCGReg reg
, intptr_t start
, intptr_t size
)
1191 s
->frame_start
= start
;
1192 s
->frame_end
= start
+ size
;
1194 = tcg_global_reg_new_internal(s
, TCG_TYPE_PTR
, reg
, "_frame");
1197 TCGTemp
*tcg_global_mem_new_internal(TCGType type
, TCGv_ptr base
,
1198 intptr_t offset
, const char *name
)
1200 TCGContext
*s
= tcg_ctx
;
1201 TCGTemp
*base_ts
= tcgv_ptr_temp(base
);
1202 TCGTemp
*ts
= tcg_global_alloc(s
);
1203 int indirect_reg
= 0;
1205 switch (base_ts
->kind
) {
1209 /* We do not support double-indirect registers. */
1210 tcg_debug_assert(!base_ts
->indirect_reg
);
1211 base_ts
->indirect_base
= 1;
1212 s
->nb_indirects
+= (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
1217 g_assert_not_reached();
1220 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1221 TCGTemp
*ts2
= tcg_global_alloc(s
);
1224 ts
->base_type
= TCG_TYPE_I64
;
1225 ts
->type
= TCG_TYPE_I32
;
1226 ts
->indirect_reg
= indirect_reg
;
1227 ts
->mem_allocated
= 1;
1228 ts
->mem_base
= base_ts
;
1229 ts
->mem_offset
= offset
;
1230 pstrcpy(buf
, sizeof(buf
), name
);
1231 pstrcat(buf
, sizeof(buf
), "_0");
1232 ts
->name
= strdup(buf
);
1234 tcg_debug_assert(ts2
== ts
+ 1);
1235 ts2
->base_type
= TCG_TYPE_I64
;
1236 ts2
->type
= TCG_TYPE_I32
;
1237 ts2
->indirect_reg
= indirect_reg
;
1238 ts2
->mem_allocated
= 1;
1239 ts2
->mem_base
= base_ts
;
1240 ts2
->mem_offset
= offset
+ 4;
1241 ts2
->temp_subindex
= 1;
1242 pstrcpy(buf
, sizeof(buf
), name
);
1243 pstrcat(buf
, sizeof(buf
), "_1");
1244 ts2
->name
= strdup(buf
);
1246 ts
->base_type
= type
;
1248 ts
->indirect_reg
= indirect_reg
;
1249 ts
->mem_allocated
= 1;
1250 ts
->mem_base
= base_ts
;
1251 ts
->mem_offset
= offset
;
1257 TCGTemp
*tcg_temp_new_internal(TCGType type
, bool temp_local
)
1259 TCGContext
*s
= tcg_ctx
;
1260 TCGTempKind kind
= temp_local
? TEMP_LOCAL
: TEMP_NORMAL
;
1264 k
= type
+ (temp_local
? TCG_TYPE_COUNT
: 0);
1265 idx
= find_first_bit(s
->free_temps
[k
].l
, TCG_MAX_TEMPS
);
1266 if (idx
< TCG_MAX_TEMPS
) {
1267 /* There is already an available temp with the right type. */
1268 clear_bit(idx
, s
->free_temps
[k
].l
);
1270 ts
= &s
->temps
[idx
];
1271 ts
->temp_allocated
= 1;
1272 tcg_debug_assert(ts
->base_type
== type
);
1273 tcg_debug_assert(ts
->kind
== kind
);
1285 n
= 64 / TCG_TARGET_REG_BITS
;
1288 n
= 128 / TCG_TARGET_REG_BITS
;
1291 g_assert_not_reached();
1294 ts
= tcg_temp_alloc(s
);
1295 ts
->base_type
= type
;
1296 ts
->temp_allocated
= 1;
1302 ts
->type
= TCG_TYPE_REG
;
1304 for (i
= 1; i
< n
; ++i
) {
1305 TCGTemp
*ts2
= tcg_temp_alloc(s
);
1307 tcg_debug_assert(ts2
== ts
+ i
);
1308 ts2
->base_type
= type
;
1309 ts2
->type
= TCG_TYPE_REG
;
1310 ts2
->temp_allocated
= 1;
1311 ts2
->temp_subindex
= i
;
1317 #if defined(CONFIG_DEBUG_TCG)
1323 TCGv_vec
tcg_temp_new_vec(TCGType type
)
1327 #ifdef CONFIG_DEBUG_TCG
1330 assert(TCG_TARGET_HAS_v64
);
1333 assert(TCG_TARGET_HAS_v128
);
1336 assert(TCG_TARGET_HAS_v256
);
1339 g_assert_not_reached();
1343 t
= tcg_temp_new_internal(type
, 0);
1344 return temp_tcgv_vec(t
);
1347 /* Create a new temp of the same type as an existing temp. */
1348 TCGv_vec
tcg_temp_new_vec_matching(TCGv_vec match
)
1350 TCGTemp
*t
= tcgv_vec_temp(match
);
1352 tcg_debug_assert(t
->temp_allocated
!= 0);
1354 t
= tcg_temp_new_internal(t
->base_type
, 0);
1355 return temp_tcgv_vec(t
);
1358 void tcg_temp_free_internal(TCGTemp
*ts
)
1360 TCGContext
*s
= tcg_ctx
;
1366 * In order to simplify users of tcg_constant_*,
1367 * silently ignore free.
1374 g_assert_not_reached();
1377 #if defined(CONFIG_DEBUG_TCG)
1379 if (s
->temps_in_use
< 0) {
1380 fprintf(stderr
, "More temporaries freed than allocated!\n");
1384 tcg_debug_assert(ts
->temp_allocated
!= 0);
1385 ts
->temp_allocated
= 0;
1388 k
= ts
->base_type
+ (ts
->kind
== TEMP_NORMAL
? 0 : TCG_TYPE_COUNT
);
1389 set_bit(idx
, s
->free_temps
[k
].l
);
1392 TCGTemp
*tcg_constant_internal(TCGType type
, int64_t val
)
1394 TCGContext
*s
= tcg_ctx
;
1395 GHashTable
*h
= s
->const_table
[type
];
1399 h
= g_hash_table_new(g_int64_hash
, g_int64_equal
);
1400 s
->const_table
[type
] = h
;
1403 ts
= g_hash_table_lookup(h
, &val
);
1407 ts
= tcg_temp_alloc(s
);
1409 if (TCG_TARGET_REG_BITS
== 32 && type
== TCG_TYPE_I64
) {
1410 TCGTemp
*ts2
= tcg_temp_alloc(s
);
1412 tcg_debug_assert(ts2
== ts
+ 1);
1414 ts
->base_type
= TCG_TYPE_I64
;
1415 ts
->type
= TCG_TYPE_I32
;
1416 ts
->kind
= TEMP_CONST
;
1417 ts
->temp_allocated
= 1;
1419 ts2
->base_type
= TCG_TYPE_I64
;
1420 ts2
->type
= TCG_TYPE_I32
;
1421 ts2
->kind
= TEMP_CONST
;
1422 ts2
->temp_allocated
= 1;
1423 ts2
->temp_subindex
= 1;
1426 * Retain the full value of the 64-bit constant in the low
1427 * part, so that the hash table works. Actual uses will
1428 * truncate the value to the low part.
1430 ts
[HOST_BIG_ENDIAN
].val
= val
;
1431 ts
[!HOST_BIG_ENDIAN
].val
= val
>> 32;
1432 val_ptr
= &ts
[HOST_BIG_ENDIAN
].val
;
1434 ts
->base_type
= type
;
1436 ts
->kind
= TEMP_CONST
;
1437 ts
->temp_allocated
= 1;
1441 g_hash_table_insert(h
, val_ptr
, ts
);
1447 TCGv_vec
tcg_constant_vec(TCGType type
, unsigned vece
, int64_t val
)
1449 val
= dup_const(vece
, val
);
1450 return temp_tcgv_vec(tcg_constant_internal(type
, val
));
1453 TCGv_vec
tcg_constant_vec_matching(TCGv_vec match
, unsigned vece
, int64_t val
)
1455 TCGTemp
*t
= tcgv_vec_temp(match
);
1457 tcg_debug_assert(t
->temp_allocated
!= 0);
1458 return tcg_constant_vec(t
->base_type
, vece
, val
);
1461 TCGv_i32
tcg_const_i32(int32_t val
)
1464 t0
= tcg_temp_new_i32();
1465 tcg_gen_movi_i32(t0
, val
);
1469 TCGv_i64
tcg_const_i64(int64_t val
)
1472 t0
= tcg_temp_new_i64();
1473 tcg_gen_movi_i64(t0
, val
);
1477 TCGv_i32
tcg_const_local_i32(int32_t val
)
1480 t0
= tcg_temp_local_new_i32();
1481 tcg_gen_movi_i32(t0
, val
);
1485 TCGv_i64
tcg_const_local_i64(int64_t val
)
1488 t0
= tcg_temp_local_new_i64();
1489 tcg_gen_movi_i64(t0
, val
);
1493 #if defined(CONFIG_DEBUG_TCG)
1494 void tcg_clear_temp_count(void)
1496 TCGContext
*s
= tcg_ctx
;
1497 s
->temps_in_use
= 0;
1500 int tcg_check_temp_count(void)
1502 TCGContext
*s
= tcg_ctx
;
1503 if (s
->temps_in_use
) {
1504 /* Clear the count so that we don't give another
1505 * warning immediately next time around.
1507 s
->temps_in_use
= 0;
1514 /* Return true if OP may appear in the opcode stream.
1515 Test the runtime variable that controls each opcode. */
1516 bool tcg_op_supported(TCGOpcode op
)
1519 = TCG_TARGET_HAS_v64
| TCG_TARGET_HAS_v128
| TCG_TARGET_HAS_v256
;
1522 case INDEX_op_discard
:
1523 case INDEX_op_set_label
:
1527 case INDEX_op_insn_start
:
1528 case INDEX_op_exit_tb
:
1529 case INDEX_op_goto_tb
:
1530 case INDEX_op_goto_ptr
:
1531 case INDEX_op_qemu_ld_i32
:
1532 case INDEX_op_qemu_st_i32
:
1533 case INDEX_op_qemu_ld_i64
:
1534 case INDEX_op_qemu_st_i64
:
1537 case INDEX_op_qemu_st8_i32
:
1538 return TCG_TARGET_HAS_qemu_st8_i32
;
1540 case INDEX_op_mov_i32
:
1541 case INDEX_op_setcond_i32
:
1542 case INDEX_op_brcond_i32
:
1543 case INDEX_op_ld8u_i32
:
1544 case INDEX_op_ld8s_i32
:
1545 case INDEX_op_ld16u_i32
:
1546 case INDEX_op_ld16s_i32
:
1547 case INDEX_op_ld_i32
:
1548 case INDEX_op_st8_i32
:
1549 case INDEX_op_st16_i32
:
1550 case INDEX_op_st_i32
:
1551 case INDEX_op_add_i32
:
1552 case INDEX_op_sub_i32
:
1553 case INDEX_op_mul_i32
:
1554 case INDEX_op_and_i32
:
1555 case INDEX_op_or_i32
:
1556 case INDEX_op_xor_i32
:
1557 case INDEX_op_shl_i32
:
1558 case INDEX_op_shr_i32
:
1559 case INDEX_op_sar_i32
:
1562 case INDEX_op_movcond_i32
:
1563 return TCG_TARGET_HAS_movcond_i32
;
1564 case INDEX_op_div_i32
:
1565 case INDEX_op_divu_i32
:
1566 return TCG_TARGET_HAS_div_i32
;
1567 case INDEX_op_rem_i32
:
1568 case INDEX_op_remu_i32
:
1569 return TCG_TARGET_HAS_rem_i32
;
1570 case INDEX_op_div2_i32
:
1571 case INDEX_op_divu2_i32
:
1572 return TCG_TARGET_HAS_div2_i32
;
1573 case INDEX_op_rotl_i32
:
1574 case INDEX_op_rotr_i32
:
1575 return TCG_TARGET_HAS_rot_i32
;
1576 case INDEX_op_deposit_i32
:
1577 return TCG_TARGET_HAS_deposit_i32
;
1578 case INDEX_op_extract_i32
:
1579 return TCG_TARGET_HAS_extract_i32
;
1580 case INDEX_op_sextract_i32
:
1581 return TCG_TARGET_HAS_sextract_i32
;
1582 case INDEX_op_extract2_i32
:
1583 return TCG_TARGET_HAS_extract2_i32
;
1584 case INDEX_op_add2_i32
:
1585 return TCG_TARGET_HAS_add2_i32
;
1586 case INDEX_op_sub2_i32
:
1587 return TCG_TARGET_HAS_sub2_i32
;
1588 case INDEX_op_mulu2_i32
:
1589 return TCG_TARGET_HAS_mulu2_i32
;
1590 case INDEX_op_muls2_i32
:
1591 return TCG_TARGET_HAS_muls2_i32
;
1592 case INDEX_op_muluh_i32
:
1593 return TCG_TARGET_HAS_muluh_i32
;
1594 case INDEX_op_mulsh_i32
:
1595 return TCG_TARGET_HAS_mulsh_i32
;
1596 case INDEX_op_ext8s_i32
:
1597 return TCG_TARGET_HAS_ext8s_i32
;
1598 case INDEX_op_ext16s_i32
:
1599 return TCG_TARGET_HAS_ext16s_i32
;
1600 case INDEX_op_ext8u_i32
:
1601 return TCG_TARGET_HAS_ext8u_i32
;
1602 case INDEX_op_ext16u_i32
:
1603 return TCG_TARGET_HAS_ext16u_i32
;
1604 case INDEX_op_bswap16_i32
:
1605 return TCG_TARGET_HAS_bswap16_i32
;
1606 case INDEX_op_bswap32_i32
:
1607 return TCG_TARGET_HAS_bswap32_i32
;
1608 case INDEX_op_not_i32
:
1609 return TCG_TARGET_HAS_not_i32
;
1610 case INDEX_op_neg_i32
:
1611 return TCG_TARGET_HAS_neg_i32
;
1612 case INDEX_op_andc_i32
:
1613 return TCG_TARGET_HAS_andc_i32
;
1614 case INDEX_op_orc_i32
:
1615 return TCG_TARGET_HAS_orc_i32
;
1616 case INDEX_op_eqv_i32
:
1617 return TCG_TARGET_HAS_eqv_i32
;
1618 case INDEX_op_nand_i32
:
1619 return TCG_TARGET_HAS_nand_i32
;
1620 case INDEX_op_nor_i32
:
1621 return TCG_TARGET_HAS_nor_i32
;
1622 case INDEX_op_clz_i32
:
1623 return TCG_TARGET_HAS_clz_i32
;
1624 case INDEX_op_ctz_i32
:
1625 return TCG_TARGET_HAS_ctz_i32
;
1626 case INDEX_op_ctpop_i32
:
1627 return TCG_TARGET_HAS_ctpop_i32
;
1629 case INDEX_op_brcond2_i32
:
1630 case INDEX_op_setcond2_i32
:
1631 return TCG_TARGET_REG_BITS
== 32;
1633 case INDEX_op_mov_i64
:
1634 case INDEX_op_setcond_i64
:
1635 case INDEX_op_brcond_i64
:
1636 case INDEX_op_ld8u_i64
:
1637 case INDEX_op_ld8s_i64
:
1638 case INDEX_op_ld16u_i64
:
1639 case INDEX_op_ld16s_i64
:
1640 case INDEX_op_ld32u_i64
:
1641 case INDEX_op_ld32s_i64
:
1642 case INDEX_op_ld_i64
:
1643 case INDEX_op_st8_i64
:
1644 case INDEX_op_st16_i64
:
1645 case INDEX_op_st32_i64
:
1646 case INDEX_op_st_i64
:
1647 case INDEX_op_add_i64
:
1648 case INDEX_op_sub_i64
:
1649 case INDEX_op_mul_i64
:
1650 case INDEX_op_and_i64
:
1651 case INDEX_op_or_i64
:
1652 case INDEX_op_xor_i64
:
1653 case INDEX_op_shl_i64
:
1654 case INDEX_op_shr_i64
:
1655 case INDEX_op_sar_i64
:
1656 case INDEX_op_ext_i32_i64
:
1657 case INDEX_op_extu_i32_i64
:
1658 return TCG_TARGET_REG_BITS
== 64;
1660 case INDEX_op_movcond_i64
:
1661 return TCG_TARGET_HAS_movcond_i64
;
1662 case INDEX_op_div_i64
:
1663 case INDEX_op_divu_i64
:
1664 return TCG_TARGET_HAS_div_i64
;
1665 case INDEX_op_rem_i64
:
1666 case INDEX_op_remu_i64
:
1667 return TCG_TARGET_HAS_rem_i64
;
1668 case INDEX_op_div2_i64
:
1669 case INDEX_op_divu2_i64
:
1670 return TCG_TARGET_HAS_div2_i64
;
1671 case INDEX_op_rotl_i64
:
1672 case INDEX_op_rotr_i64
:
1673 return TCG_TARGET_HAS_rot_i64
;
1674 case INDEX_op_deposit_i64
:
1675 return TCG_TARGET_HAS_deposit_i64
;
1676 case INDEX_op_extract_i64
:
1677 return TCG_TARGET_HAS_extract_i64
;
1678 case INDEX_op_sextract_i64
:
1679 return TCG_TARGET_HAS_sextract_i64
;
1680 case INDEX_op_extract2_i64
:
1681 return TCG_TARGET_HAS_extract2_i64
;
1682 case INDEX_op_extrl_i64_i32
:
1683 return TCG_TARGET_HAS_extrl_i64_i32
;
1684 case INDEX_op_extrh_i64_i32
:
1685 return TCG_TARGET_HAS_extrh_i64_i32
;
1686 case INDEX_op_ext8s_i64
:
1687 return TCG_TARGET_HAS_ext8s_i64
;
1688 case INDEX_op_ext16s_i64
:
1689 return TCG_TARGET_HAS_ext16s_i64
;
1690 case INDEX_op_ext32s_i64
:
1691 return TCG_TARGET_HAS_ext32s_i64
;
1692 case INDEX_op_ext8u_i64
:
1693 return TCG_TARGET_HAS_ext8u_i64
;
1694 case INDEX_op_ext16u_i64
:
1695 return TCG_TARGET_HAS_ext16u_i64
;
1696 case INDEX_op_ext32u_i64
:
1697 return TCG_TARGET_HAS_ext32u_i64
;
1698 case INDEX_op_bswap16_i64
:
1699 return TCG_TARGET_HAS_bswap16_i64
;
1700 case INDEX_op_bswap32_i64
:
1701 return TCG_TARGET_HAS_bswap32_i64
;
1702 case INDEX_op_bswap64_i64
:
1703 return TCG_TARGET_HAS_bswap64_i64
;
1704 case INDEX_op_not_i64
:
1705 return TCG_TARGET_HAS_not_i64
;
1706 case INDEX_op_neg_i64
:
1707 return TCG_TARGET_HAS_neg_i64
;
1708 case INDEX_op_andc_i64
:
1709 return TCG_TARGET_HAS_andc_i64
;
1710 case INDEX_op_orc_i64
:
1711 return TCG_TARGET_HAS_orc_i64
;
1712 case INDEX_op_eqv_i64
:
1713 return TCG_TARGET_HAS_eqv_i64
;
1714 case INDEX_op_nand_i64
:
1715 return TCG_TARGET_HAS_nand_i64
;
1716 case INDEX_op_nor_i64
:
1717 return TCG_TARGET_HAS_nor_i64
;
1718 case INDEX_op_clz_i64
:
1719 return TCG_TARGET_HAS_clz_i64
;
1720 case INDEX_op_ctz_i64
:
1721 return TCG_TARGET_HAS_ctz_i64
;
1722 case INDEX_op_ctpop_i64
:
1723 return TCG_TARGET_HAS_ctpop_i64
;
1724 case INDEX_op_add2_i64
:
1725 return TCG_TARGET_HAS_add2_i64
;
1726 case INDEX_op_sub2_i64
:
1727 return TCG_TARGET_HAS_sub2_i64
;
1728 case INDEX_op_mulu2_i64
:
1729 return TCG_TARGET_HAS_mulu2_i64
;
1730 case INDEX_op_muls2_i64
:
1731 return TCG_TARGET_HAS_muls2_i64
;
1732 case INDEX_op_muluh_i64
:
1733 return TCG_TARGET_HAS_muluh_i64
;
1734 case INDEX_op_mulsh_i64
:
1735 return TCG_TARGET_HAS_mulsh_i64
;
1737 case INDEX_op_mov_vec
:
1738 case INDEX_op_dup_vec
:
1739 case INDEX_op_dupm_vec
:
1740 case INDEX_op_ld_vec
:
1741 case INDEX_op_st_vec
:
1742 case INDEX_op_add_vec
:
1743 case INDEX_op_sub_vec
:
1744 case INDEX_op_and_vec
:
1745 case INDEX_op_or_vec
:
1746 case INDEX_op_xor_vec
:
1747 case INDEX_op_cmp_vec
:
1749 case INDEX_op_dup2_vec
:
1750 return have_vec
&& TCG_TARGET_REG_BITS
== 32;
1751 case INDEX_op_not_vec
:
1752 return have_vec
&& TCG_TARGET_HAS_not_vec
;
1753 case INDEX_op_neg_vec
:
1754 return have_vec
&& TCG_TARGET_HAS_neg_vec
;
1755 case INDEX_op_abs_vec
:
1756 return have_vec
&& TCG_TARGET_HAS_abs_vec
;
1757 case INDEX_op_andc_vec
:
1758 return have_vec
&& TCG_TARGET_HAS_andc_vec
;
1759 case INDEX_op_orc_vec
:
1760 return have_vec
&& TCG_TARGET_HAS_orc_vec
;
1761 case INDEX_op_nand_vec
:
1762 return have_vec
&& TCG_TARGET_HAS_nand_vec
;
1763 case INDEX_op_nor_vec
:
1764 return have_vec
&& TCG_TARGET_HAS_nor_vec
;
1765 case INDEX_op_eqv_vec
:
1766 return have_vec
&& TCG_TARGET_HAS_eqv_vec
;
1767 case INDEX_op_mul_vec
:
1768 return have_vec
&& TCG_TARGET_HAS_mul_vec
;
1769 case INDEX_op_shli_vec
:
1770 case INDEX_op_shri_vec
:
1771 case INDEX_op_sari_vec
:
1772 return have_vec
&& TCG_TARGET_HAS_shi_vec
;
1773 case INDEX_op_shls_vec
:
1774 case INDEX_op_shrs_vec
:
1775 case INDEX_op_sars_vec
:
1776 return have_vec
&& TCG_TARGET_HAS_shs_vec
;
1777 case INDEX_op_shlv_vec
:
1778 case INDEX_op_shrv_vec
:
1779 case INDEX_op_sarv_vec
:
1780 return have_vec
&& TCG_TARGET_HAS_shv_vec
;
1781 case INDEX_op_rotli_vec
:
1782 return have_vec
&& TCG_TARGET_HAS_roti_vec
;
1783 case INDEX_op_rotls_vec
:
1784 return have_vec
&& TCG_TARGET_HAS_rots_vec
;
1785 case INDEX_op_rotlv_vec
:
1786 case INDEX_op_rotrv_vec
:
1787 return have_vec
&& TCG_TARGET_HAS_rotv_vec
;
1788 case INDEX_op_ssadd_vec
:
1789 case INDEX_op_usadd_vec
:
1790 case INDEX_op_sssub_vec
:
1791 case INDEX_op_ussub_vec
:
1792 return have_vec
&& TCG_TARGET_HAS_sat_vec
;
1793 case INDEX_op_smin_vec
:
1794 case INDEX_op_umin_vec
:
1795 case INDEX_op_smax_vec
:
1796 case INDEX_op_umax_vec
:
1797 return have_vec
&& TCG_TARGET_HAS_minmax_vec
;
1798 case INDEX_op_bitsel_vec
:
1799 return have_vec
&& TCG_TARGET_HAS_bitsel_vec
;
1800 case INDEX_op_cmpsel_vec
:
1801 return have_vec
&& TCG_TARGET_HAS_cmpsel_vec
;
1804 tcg_debug_assert(op
> INDEX_op_last_generic
&& op
< NB_OPS
);
1809 static TCGOp
*tcg_op_alloc(TCGOpcode opc
, unsigned nargs
);
1811 void tcg_gen_callN(void *func
, TCGTemp
*ret
, int nargs
, TCGTemp
**args
)
1813 const TCGHelperInfo
*info
;
1814 TCGv_i64 extend_free
[MAX_CALL_IARGS
];
1817 int i
, n
, pi
= 0, total_args
;
1819 info
= g_hash_table_lookup(helper_table
, (gpointer
)func
);
1820 total_args
= info
->nr_out
+ info
->nr_in
+ 2;
1821 op
= tcg_op_alloc(INDEX_op_call
, total_args
);
1823 #ifdef CONFIG_PLUGIN
1824 /* Flag helpers that may affect guest state */
1825 if (tcg_ctx
->plugin_insn
&&
1826 !(info
->flags
& TCG_CALL_PLUGIN
) &&
1827 !(info
->flags
& TCG_CALL_NO_SIDE_EFFECTS
)) {
1828 tcg_ctx
->plugin_insn
->calls_helpers
= true;
1832 TCGOP_CALLO(op
) = n
= info
->nr_out
;
1835 tcg_debug_assert(ret
== NULL
);
1838 tcg_debug_assert(ret
!= NULL
);
1839 op
->args
[pi
++] = temp_arg(ret
);
1843 tcg_debug_assert(ret
!= NULL
);
1844 tcg_debug_assert(ret
->base_type
== ret
->type
+ ctz32(n
));
1845 tcg_debug_assert(ret
->temp_subindex
== 0);
1846 for (i
= 0; i
< n
; ++i
) {
1847 op
->args
[pi
++] = temp_arg(ret
+ i
);
1851 g_assert_not_reached();
1854 TCGOP_CALLI(op
) = n
= info
->nr_in
;
1855 for (i
= 0; i
< n
; i
++) {
1856 const TCGCallArgumentLoc
*loc
= &info
->in
[i
];
1857 TCGTemp
*ts
= args
[loc
->arg_idx
] + loc
->tmp_subindex
;
1859 switch (loc
->kind
) {
1860 case TCG_CALL_ARG_NORMAL
:
1861 case TCG_CALL_ARG_BY_REF
:
1862 case TCG_CALL_ARG_BY_REF_N
:
1863 op
->args
[pi
++] = temp_arg(ts
);
1866 case TCG_CALL_ARG_EXTEND_U
:
1867 case TCG_CALL_ARG_EXTEND_S
:
1869 TCGv_i64 temp
= tcg_temp_new_i64();
1870 TCGv_i32 orig
= temp_tcgv_i32(ts
);
1872 if (loc
->kind
== TCG_CALL_ARG_EXTEND_S
) {
1873 tcg_gen_ext_i32_i64(temp
, orig
);
1875 tcg_gen_extu_i32_i64(temp
, orig
);
1877 op
->args
[pi
++] = tcgv_i64_arg(temp
);
1878 extend_free
[n_extend
++] = temp
;
1883 g_assert_not_reached();
1886 op
->args
[pi
++] = (uintptr_t)func
;
1887 op
->args
[pi
++] = (uintptr_t)info
;
1888 tcg_debug_assert(pi
== total_args
);
1890 QTAILQ_INSERT_TAIL(&tcg_ctx
->ops
, op
, link
);
1892 tcg_debug_assert(n_extend
< ARRAY_SIZE(extend_free
));
1893 for (i
= 0; i
< n_extend
; ++i
) {
1894 tcg_temp_free_i64(extend_free
[i
]);
1898 static void tcg_reg_alloc_start(TCGContext
*s
)
1902 for (i
= 0, n
= s
->nb_temps
; i
< n
; i
++) {
1903 TCGTemp
*ts
= &s
->temps
[i
];
1904 TCGTempVal val
= TEMP_VAL_MEM
;
1908 val
= TEMP_VAL_CONST
;
1917 val
= TEMP_VAL_DEAD
;
1920 ts
->mem_allocated
= 0;
1923 g_assert_not_reached();
1928 memset(s
->reg_to_temp
, 0, sizeof(s
->reg_to_temp
));
1931 static char *tcg_get_arg_str_ptr(TCGContext
*s
, char *buf
, int buf_size
,
1934 int idx
= temp_idx(ts
);
1939 pstrcpy(buf
, buf_size
, ts
->name
);
1942 snprintf(buf
, buf_size
, "loc%d", idx
- s
->nb_globals
);
1945 snprintf(buf
, buf_size
, "ebb%d", idx
- s
->nb_globals
);
1948 snprintf(buf
, buf_size
, "tmp%d", idx
- s
->nb_globals
);
1953 snprintf(buf
, buf_size
, "$0x%x", (int32_t)ts
->val
);
1955 #if TCG_TARGET_REG_BITS > 32
1957 snprintf(buf
, buf_size
, "$0x%" PRIx64
, ts
->val
);
1963 snprintf(buf
, buf_size
, "v%d$0x%" PRIx64
,
1964 64 << (ts
->type
- TCG_TYPE_V64
), ts
->val
);
1967 g_assert_not_reached();
1974 static char *tcg_get_arg_str(TCGContext
*s
, char *buf
,
1975 int buf_size
, TCGArg arg
)
1977 return tcg_get_arg_str_ptr(s
, buf
, buf_size
, arg_temp(arg
));
1980 static const char * const cond_name
[] =
1982 [TCG_COND_NEVER
] = "never",
1983 [TCG_COND_ALWAYS
] = "always",
1984 [TCG_COND_EQ
] = "eq",
1985 [TCG_COND_NE
] = "ne",
1986 [TCG_COND_LT
] = "lt",
1987 [TCG_COND_GE
] = "ge",
1988 [TCG_COND_LE
] = "le",
1989 [TCG_COND_GT
] = "gt",
1990 [TCG_COND_LTU
] = "ltu",
1991 [TCG_COND_GEU
] = "geu",
1992 [TCG_COND_LEU
] = "leu",
1993 [TCG_COND_GTU
] = "gtu"
1996 static const char * const ldst_name
[] =
2012 static const char * const alignment_name
[(MO_AMASK
>> MO_ASHIFT
) + 1] = {
2013 #ifdef TARGET_ALIGNED_ONLY
2014 [MO_UNALN
>> MO_ASHIFT
] = "un+",
2015 [MO_ALIGN
>> MO_ASHIFT
] = "",
2017 [MO_UNALN
>> MO_ASHIFT
] = "",
2018 [MO_ALIGN
>> MO_ASHIFT
] = "al+",
2020 [MO_ALIGN_2
>> MO_ASHIFT
] = "al2+",
2021 [MO_ALIGN_4
>> MO_ASHIFT
] = "al4+",
2022 [MO_ALIGN_8
>> MO_ASHIFT
] = "al8+",
2023 [MO_ALIGN_16
>> MO_ASHIFT
] = "al16+",
2024 [MO_ALIGN_32
>> MO_ASHIFT
] = "al32+",
2025 [MO_ALIGN_64
>> MO_ASHIFT
] = "al64+",
2028 static const char bswap_flag_name
[][6] = {
2029 [TCG_BSWAP_IZ
] = "iz",
2030 [TCG_BSWAP_OZ
] = "oz",
2031 [TCG_BSWAP_OS
] = "os",
2032 [TCG_BSWAP_IZ
| TCG_BSWAP_OZ
] = "iz,oz",
2033 [TCG_BSWAP_IZ
| TCG_BSWAP_OS
] = "iz,os",
2036 static inline bool tcg_regset_single(TCGRegSet d
)
2038 return (d
& (d
- 1)) == 0;
2041 static inline TCGReg
tcg_regset_first(TCGRegSet d
)
2043 if (TCG_TARGET_NB_REGS
<= 32) {
2050 /* Return only the number of characters output -- no error return. */
2051 #define ne_fprintf(...) \
2052 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2054 static void tcg_dump_ops(TCGContext
*s
, FILE *f
, bool have_prefs
)
2059 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
2060 int i
, k
, nb_oargs
, nb_iargs
, nb_cargs
;
2061 const TCGOpDef
*def
;
2066 def
= &tcg_op_defs
[c
];
2068 if (c
== INDEX_op_insn_start
) {
2070 col
+= ne_fprintf(f
, "\n ----");
2072 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
2074 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2075 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
2079 col
+= ne_fprintf(f
, " " TARGET_FMT_lx
, a
);
2081 } else if (c
== INDEX_op_call
) {
2082 const TCGHelperInfo
*info
= tcg_call_info(op
);
2083 void *func
= tcg_call_func(op
);
2085 /* variable number of arguments */
2086 nb_oargs
= TCGOP_CALLO(op
);
2087 nb_iargs
= TCGOP_CALLI(op
);
2088 nb_cargs
= def
->nb_cargs
;
2090 col
+= ne_fprintf(f
, " %s ", def
->name
);
2093 * Print the function name from TCGHelperInfo, if available.
2094 * Note that plugins have a template function for the info,
2095 * but the actual function pointer comes from the plugin.
2097 if (func
== info
->func
) {
2098 col
+= ne_fprintf(f
, "%s", info
->name
);
2100 col
+= ne_fprintf(f
, "plugin(%p)", func
);
2103 col
+= ne_fprintf(f
, ",$0x%x,$%d", info
->flags
, nb_oargs
);
2104 for (i
= 0; i
< nb_oargs
; i
++) {
2105 col
+= ne_fprintf(f
, ",%s", tcg_get_arg_str(s
, buf
, sizeof(buf
),
2108 for (i
= 0; i
< nb_iargs
; i
++) {
2109 TCGArg arg
= op
->args
[nb_oargs
+ i
];
2110 const char *t
= tcg_get_arg_str(s
, buf
, sizeof(buf
), arg
);
2111 col
+= ne_fprintf(f
, ",%s", t
);
2114 col
+= ne_fprintf(f
, " %s ", def
->name
);
2116 nb_oargs
= def
->nb_oargs
;
2117 nb_iargs
= def
->nb_iargs
;
2118 nb_cargs
= def
->nb_cargs
;
2120 if (def
->flags
& TCG_OPF_VECTOR
) {
2121 col
+= ne_fprintf(f
, "v%d,e%d,", 64 << TCGOP_VECL(op
),
2122 8 << TCGOP_VECE(op
));
2126 for (i
= 0; i
< nb_oargs
; i
++) {
2127 const char *sep
= k
? "," : "";
2128 col
+= ne_fprintf(f
, "%s%s", sep
,
2129 tcg_get_arg_str(s
, buf
, sizeof(buf
),
2132 for (i
= 0; i
< nb_iargs
; i
++) {
2133 const char *sep
= k
? "," : "";
2134 col
+= ne_fprintf(f
, "%s%s", sep
,
2135 tcg_get_arg_str(s
, buf
, sizeof(buf
),
2139 case INDEX_op_brcond_i32
:
2140 case INDEX_op_setcond_i32
:
2141 case INDEX_op_movcond_i32
:
2142 case INDEX_op_brcond2_i32
:
2143 case INDEX_op_setcond2_i32
:
2144 case INDEX_op_brcond_i64
:
2145 case INDEX_op_setcond_i64
:
2146 case INDEX_op_movcond_i64
:
2147 case INDEX_op_cmp_vec
:
2148 case INDEX_op_cmpsel_vec
:
2149 if (op
->args
[k
] < ARRAY_SIZE(cond_name
)
2150 && cond_name
[op
->args
[k
]]) {
2151 col
+= ne_fprintf(f
, ",%s", cond_name
[op
->args
[k
++]]);
2153 col
+= ne_fprintf(f
, ",$0x%" TCG_PRIlx
, op
->args
[k
++]);
2157 case INDEX_op_qemu_ld_i32
:
2158 case INDEX_op_qemu_st_i32
:
2159 case INDEX_op_qemu_st8_i32
:
2160 case INDEX_op_qemu_ld_i64
:
2161 case INDEX_op_qemu_st_i64
:
2163 MemOpIdx oi
= op
->args
[k
++];
2164 MemOp op
= get_memop(oi
);
2165 unsigned ix
= get_mmuidx(oi
);
2167 if (op
& ~(MO_AMASK
| MO_BSWAP
| MO_SSIZE
)) {
2168 col
+= ne_fprintf(f
, ",$0x%x,%u", op
, ix
);
2170 const char *s_al
, *s_op
;
2171 s_al
= alignment_name
[(op
& MO_AMASK
) >> MO_ASHIFT
];
2172 s_op
= ldst_name
[op
& (MO_BSWAP
| MO_SSIZE
)];
2173 col
+= ne_fprintf(f
, ",%s%s,%u", s_al
, s_op
, ix
);
2178 case INDEX_op_bswap16_i32
:
2179 case INDEX_op_bswap16_i64
:
2180 case INDEX_op_bswap32_i32
:
2181 case INDEX_op_bswap32_i64
:
2182 case INDEX_op_bswap64_i64
:
2184 TCGArg flags
= op
->args
[k
];
2185 const char *name
= NULL
;
2187 if (flags
< ARRAY_SIZE(bswap_flag_name
)) {
2188 name
= bswap_flag_name
[flags
];
2191 col
+= ne_fprintf(f
, ",%s", name
);
2193 col
+= ne_fprintf(f
, ",$0x%" TCG_PRIlx
, flags
);
2203 case INDEX_op_set_label
:
2205 case INDEX_op_brcond_i32
:
2206 case INDEX_op_brcond_i64
:
2207 case INDEX_op_brcond2_i32
:
2208 col
+= ne_fprintf(f
, "%s$L%d", k
? "," : "",
2209 arg_label(op
->args
[k
])->id
);
2215 for (; i
< nb_cargs
; i
++, k
++) {
2216 col
+= ne_fprintf(f
, "%s$0x%" TCG_PRIlx
, k
? "," : "",
2221 if (have_prefs
|| op
->life
) {
2222 for (; col
< 40; ++col
) {
2228 unsigned life
= op
->life
;
2230 if (life
& (SYNC_ARG
* 3)) {
2231 ne_fprintf(f
, " sync:");
2232 for (i
= 0; i
< 2; ++i
) {
2233 if (life
& (SYNC_ARG
<< i
)) {
2234 ne_fprintf(f
, " %d", i
);
2240 ne_fprintf(f
, " dead:");
2241 for (i
= 0; life
; ++i
, life
>>= 1) {
2243 ne_fprintf(f
, " %d", i
);
2250 for (i
= 0; i
< nb_oargs
; ++i
) {
2251 TCGRegSet set
= output_pref(op
, i
);
2254 ne_fprintf(f
, " pref=");
2259 ne_fprintf(f
, "none");
2260 } else if (set
== MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS
)) {
2261 ne_fprintf(f
, "all");
2262 #ifdef CONFIG_DEBUG_TCG
2263 } else if (tcg_regset_single(set
)) {
2264 TCGReg reg
= tcg_regset_first(set
);
2265 ne_fprintf(f
, "%s", tcg_target_reg_names
[reg
]);
2267 } else if (TCG_TARGET_NB_REGS
<= 32) {
2268 ne_fprintf(f
, "0x%x", (uint32_t)set
);
2270 ne_fprintf(f
, "0x%" PRIx64
, (uint64_t)set
);
2279 /* we give more priority to constraints with less registers */
2280 static int get_constraint_priority(const TCGOpDef
*def
, int k
)
2282 const TCGArgConstraint
*arg_ct
= &def
->args_ct
[k
];
2283 int n
= ctpop64(arg_ct
->regs
);
2286 * Sort constraints of a single register first, which includes output
2287 * aliases (which must exactly match the input already allocated).
2289 if (n
== 1 || arg_ct
->oalias
) {
2294 * Sort register pairs next, first then second immediately after.
2295 * Arbitrarily sort multiple pairs by the index of the first reg;
2296 * there shouldn't be many pairs.
2298 switch (arg_ct
->pair
) {
2303 return (arg_ct
->pair_index
+ 1) * 2 - 1;
2306 /* Finally, sort by decreasing register count. */
2311 /* sort from highest priority to lowest */
2312 static void sort_constraints(TCGOpDef
*def
, int start
, int n
)
2315 TCGArgConstraint
*a
= def
->args_ct
;
2317 for (i
= 0; i
< n
; i
++) {
2318 a
[start
+ i
].sort_index
= start
+ i
;
2323 for (i
= 0; i
< n
- 1; i
++) {
2324 for (j
= i
+ 1; j
< n
; j
++) {
2325 int p1
= get_constraint_priority(def
, a
[start
+ i
].sort_index
);
2326 int p2
= get_constraint_priority(def
, a
[start
+ j
].sort_index
);
2328 int tmp
= a
[start
+ i
].sort_index
;
2329 a
[start
+ i
].sort_index
= a
[start
+ j
].sort_index
;
2330 a
[start
+ j
].sort_index
= tmp
;
2336 static void process_op_defs(TCGContext
*s
)
2340 for (op
= 0; op
< NB_OPS
; op
++) {
2341 TCGOpDef
*def
= &tcg_op_defs
[op
];
2342 const TCGTargetOpDef
*tdefs
;
2343 bool saw_alias_pair
= false;
2344 int i
, o
, i2
, o2
, nb_args
;
2346 if (def
->flags
& TCG_OPF_NOT_PRESENT
) {
2350 nb_args
= def
->nb_iargs
+ def
->nb_oargs
;
2356 * Macro magic should make it impossible, but double-check that
2357 * the array index is in range. Since the signness of an enum
2358 * is implementation defined, force the result to unsigned.
2360 unsigned con_set
= tcg_target_op_def(op
);
2361 tcg_debug_assert(con_set
< ARRAY_SIZE(constraint_sets
));
2362 tdefs
= &constraint_sets
[con_set
];
2364 for (i
= 0; i
< nb_args
; i
++) {
2365 const char *ct_str
= tdefs
->args_ct_str
[i
];
2366 bool input_p
= i
>= def
->nb_oargs
;
2368 /* Incomplete TCGTargetOpDef entry. */
2369 tcg_debug_assert(ct_str
!= NULL
);
2374 tcg_debug_assert(input_p
);
2375 tcg_debug_assert(o
< def
->nb_oargs
);
2376 tcg_debug_assert(def
->args_ct
[o
].regs
!= 0);
2377 tcg_debug_assert(!def
->args_ct
[o
].oalias
);
2378 def
->args_ct
[i
] = def
->args_ct
[o
];
2379 /* The output sets oalias. */
2380 def
->args_ct
[o
].oalias
= 1;
2381 def
->args_ct
[o
].alias_index
= i
;
2382 /* The input sets ialias. */
2383 def
->args_ct
[i
].ialias
= 1;
2384 def
->args_ct
[i
].alias_index
= o
;
2385 if (def
->args_ct
[i
].pair
) {
2386 saw_alias_pair
= true;
2388 tcg_debug_assert(ct_str
[1] == '\0');
2392 tcg_debug_assert(!input_p
);
2393 def
->args_ct
[i
].newreg
= true;
2397 case 'p': /* plus */
2398 /* Allocate to the register after the previous. */
2399 tcg_debug_assert(i
> (input_p
? def
->nb_oargs
: 0));
2401 tcg_debug_assert(!def
->args_ct
[o
].pair
);
2402 tcg_debug_assert(!def
->args_ct
[o
].ct
);
2403 def
->args_ct
[i
] = (TCGArgConstraint
){
2406 .regs
= def
->args_ct
[o
].regs
<< 1,
2408 def
->args_ct
[o
].pair
= 1;
2409 def
->args_ct
[o
].pair_index
= i
;
2410 tcg_debug_assert(ct_str
[1] == '\0');
2413 case 'm': /* minus */
2414 /* Allocate to the register before the previous. */
2415 tcg_debug_assert(i
> (input_p
? def
->nb_oargs
: 0));
2417 tcg_debug_assert(!def
->args_ct
[o
].pair
);
2418 tcg_debug_assert(!def
->args_ct
[o
].ct
);
2419 def
->args_ct
[i
] = (TCGArgConstraint
){
2422 .regs
= def
->args_ct
[o
].regs
>> 1,
2424 def
->args_ct
[o
].pair
= 2;
2425 def
->args_ct
[o
].pair_index
= i
;
2426 tcg_debug_assert(ct_str
[1] == '\0');
2433 def
->args_ct
[i
].ct
|= TCG_CT_CONST
;
2436 /* Include all of the target-specific constraints. */
2439 #define CONST(CASE, MASK) \
2440 case CASE: def->args_ct[i].ct |= MASK; break;
2441 #define REGS(CASE, MASK) \
2442 case CASE: def->args_ct[i].regs |= MASK; break;
2444 #include "tcg-target-con-str.h"
2453 /* Typo in TCGTargetOpDef constraint. */
2454 g_assert_not_reached();
2456 } while (*++ct_str
!= '\0');
2459 /* TCGTargetOpDef entry with too much information? */
2460 tcg_debug_assert(i
== TCG_MAX_OP_ARGS
|| tdefs
->args_ct_str
[i
] == NULL
);
2463 * Fix up output pairs that are aliased with inputs.
2464 * When we created the alias, we copied pair from the output.
2465 * There are three cases:
2466 * (1a) Pairs of inputs alias pairs of outputs.
2467 * (1b) One input aliases the first of a pair of outputs.
2468 * (2) One input aliases the second of a pair of outputs.
2470 * Case 1a is handled by making sure that the pair_index'es are
2471 * properly updated so that they appear the same as a pair of inputs.
2473 * Case 1b is handled by setting the pair_index of the input to
2474 * itself, simply so it doesn't point to an unrelated argument.
2475 * Since we don't encounter the "second" during the input allocation
2476 * phase, nothing happens with the second half of the input pair.
2478 * Case 2 is handled by setting the second input to pair=3, the
2479 * first output to pair=3, and the pair_index'es to match.
2481 if (saw_alias_pair
) {
2482 for (i
= def
->nb_oargs
; i
< nb_args
; i
++) {
2484 * Since [0-9pm] must be alone in the constraint string,
2485 * the only way they can both be set is if the pair comes
2486 * from the output alias.
2488 if (!def
->args_ct
[i
].ialias
) {
2491 switch (def
->args_ct
[i
].pair
) {
2495 o
= def
->args_ct
[i
].alias_index
;
2496 o2
= def
->args_ct
[o
].pair_index
;
2497 tcg_debug_assert(def
->args_ct
[o
].pair
== 1);
2498 tcg_debug_assert(def
->args_ct
[o2
].pair
== 2);
2499 if (def
->args_ct
[o2
].oalias
) {
2501 i2
= def
->args_ct
[o2
].alias_index
;
2502 tcg_debug_assert(def
->args_ct
[i2
].pair
== 2);
2503 def
->args_ct
[i2
].pair_index
= i
;
2504 def
->args_ct
[i
].pair_index
= i2
;
2507 def
->args_ct
[i
].pair_index
= i
;
2511 o
= def
->args_ct
[i
].alias_index
;
2512 o2
= def
->args_ct
[o
].pair_index
;
2513 tcg_debug_assert(def
->args_ct
[o
].pair
== 2);
2514 tcg_debug_assert(def
->args_ct
[o2
].pair
== 1);
2515 if (def
->args_ct
[o2
].oalias
) {
2517 i2
= def
->args_ct
[o2
].alias_index
;
2518 tcg_debug_assert(def
->args_ct
[i2
].pair
== 1);
2519 def
->args_ct
[i2
].pair_index
= i
;
2520 def
->args_ct
[i
].pair_index
= i2
;
2523 def
->args_ct
[i
].pair
= 3;
2524 def
->args_ct
[o2
].pair
= 3;
2525 def
->args_ct
[i
].pair_index
= o2
;
2526 def
->args_ct
[o2
].pair_index
= i
;
2530 g_assert_not_reached();
2535 /* sort the constraints (XXX: this is just an heuristic) */
2536 sort_constraints(def
, 0, def
->nb_oargs
);
2537 sort_constraints(def
, def
->nb_oargs
, def
->nb_iargs
);
2541 void tcg_op_remove(TCGContext
*s
, TCGOp
*op
)
2547 label
= arg_label(op
->args
[0]);
2550 case INDEX_op_brcond_i32
:
2551 case INDEX_op_brcond_i64
:
2552 label
= arg_label(op
->args
[3]);
2555 case INDEX_op_brcond2_i32
:
2556 label
= arg_label(op
->args
[5]);
2563 QTAILQ_REMOVE(&s
->ops
, op
, link
);
2564 QTAILQ_INSERT_TAIL(&s
->free_ops
, op
, link
);
2567 #ifdef CONFIG_PROFILER
2568 qatomic_set(&s
->prof
.del_op_count
, s
->prof
.del_op_count
+ 1);
2572 void tcg_remove_ops_after(TCGOp
*op
)
2574 TCGContext
*s
= tcg_ctx
;
2577 TCGOp
*last
= tcg_last_op();
2581 tcg_op_remove(s
, last
);
2585 static TCGOp
*tcg_op_alloc(TCGOpcode opc
, unsigned nargs
)
2587 TCGContext
*s
= tcg_ctx
;
2590 if (unlikely(!QTAILQ_EMPTY(&s
->free_ops
))) {
2591 QTAILQ_FOREACH(op
, &s
->free_ops
, link
) {
2592 if (nargs
<= op
->nargs
) {
2593 QTAILQ_REMOVE(&s
->free_ops
, op
, link
);
2600 /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2601 nargs
= MAX(4, nargs
);
2602 op
= tcg_malloc(sizeof(TCGOp
) + sizeof(TCGArg
) * nargs
);
2605 memset(op
, 0, offsetof(TCGOp
, link
));
2609 /* Check for bitfield overflow. */
2610 tcg_debug_assert(op
->nargs
== nargs
);
2616 TCGOp
*tcg_emit_op(TCGOpcode opc
, unsigned nargs
)
2618 TCGOp
*op
= tcg_op_alloc(opc
, nargs
);
2619 QTAILQ_INSERT_TAIL(&tcg_ctx
->ops
, op
, link
);
2623 TCGOp
*tcg_op_insert_before(TCGContext
*s
, TCGOp
*old_op
,
2624 TCGOpcode opc
, unsigned nargs
)
2626 TCGOp
*new_op
= tcg_op_alloc(opc
, nargs
);
2627 QTAILQ_INSERT_BEFORE(old_op
, new_op
, link
);
2631 TCGOp
*tcg_op_insert_after(TCGContext
*s
, TCGOp
*old_op
,
2632 TCGOpcode opc
, unsigned nargs
)
2634 TCGOp
*new_op
= tcg_op_alloc(opc
, nargs
);
2635 QTAILQ_INSERT_AFTER(&s
->ops
, old_op
, new_op
, link
);
2639 /* Reachable analysis : remove unreachable code. */
2640 static void reachable_code_pass(TCGContext
*s
)
2642 TCGOp
*op
, *op_next
;
2645 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
2650 case INDEX_op_set_label
:
2651 label
= arg_label(op
->args
[0]);
2652 if (label
->refs
== 0) {
2654 * While there is an occasional backward branch, virtually
2655 * all branches generated by the translators are forward.
2656 * Which means that generally we will have already removed
2657 * all references to the label that will be, and there is
2658 * little to be gained by iterating.
2662 /* Once we see a label, insns become live again. */
2667 * Optimization can fold conditional branches to unconditional.
2668 * If we find a label with one reference which is preceded by
2669 * an unconditional branch to it, remove both. This needed to
2670 * wait until the dead code in between them was removed.
2672 if (label
->refs
== 1) {
2673 TCGOp
*op_prev
= QTAILQ_PREV(op
, link
);
2674 if (op_prev
->opc
== INDEX_op_br
&&
2675 label
== arg_label(op_prev
->args
[0])) {
2676 tcg_op_remove(s
, op_prev
);
2684 case INDEX_op_exit_tb
:
2685 case INDEX_op_goto_ptr
:
2686 /* Unconditional branches; everything following is dead. */
2691 /* Notice noreturn helper calls, raising exceptions. */
2692 if (tcg_call_flags(op
) & TCG_CALL_NO_RETURN
) {
2697 case INDEX_op_insn_start
:
2698 /* Never remove -- we need to keep these for unwind. */
2707 tcg_op_remove(s
, op
);
2715 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2716 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2718 /* For liveness_pass_1, the register preferences for a given temp. */
2719 static inline TCGRegSet
*la_temp_pref(TCGTemp
*ts
)
2721 return ts
->state_ptr
;
2724 /* For liveness_pass_1, reset the preferences for a given temp to the
2725 * maximal regset for its type.
2727 static inline void la_reset_pref(TCGTemp
*ts
)
2730 = (ts
->state
== TS_DEAD
? 0 : tcg_target_available_regs
[ts
->type
]);
2733 /* liveness analysis: end of function: all temps are dead, and globals
2734 should be in memory. */
2735 static void la_func_end(TCGContext
*s
, int ng
, int nt
)
2739 for (i
= 0; i
< ng
; ++i
) {
2740 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2741 la_reset_pref(&s
->temps
[i
]);
2743 for (i
= ng
; i
< nt
; ++i
) {
2744 s
->temps
[i
].state
= TS_DEAD
;
2745 la_reset_pref(&s
->temps
[i
]);
2749 /* liveness analysis: end of basic block: all temps are dead, globals
2750 and local temps should be in memory. */
2751 static void la_bb_end(TCGContext
*s
, int ng
, int nt
)
2755 for (i
= 0; i
< nt
; ++i
) {
2756 TCGTemp
*ts
= &s
->temps
[i
];
2763 state
= TS_DEAD
| TS_MEM
;
2771 g_assert_not_reached();
2778 /* liveness analysis: sync globals back to memory. */
2779 static void la_global_sync(TCGContext
*s
, int ng
)
2783 for (i
= 0; i
< ng
; ++i
) {
2784 int state
= s
->temps
[i
].state
;
2785 s
->temps
[i
].state
= state
| TS_MEM
;
2786 if (state
== TS_DEAD
) {
2787 /* If the global was previously dead, reset prefs. */
2788 la_reset_pref(&s
->temps
[i
]);
2794 * liveness analysis: conditional branch: all temps are dead unless
2795 * explicitly live-across-conditional-branch, globals and local temps
2798 static void la_bb_sync(TCGContext
*s
, int ng
, int nt
)
2800 la_global_sync(s
, ng
);
2802 for (int i
= ng
; i
< nt
; ++i
) {
2803 TCGTemp
*ts
= &s
->temps
[i
];
2809 ts
->state
= state
| TS_MEM
;
2810 if (state
!= TS_DEAD
) {
2815 s
->temps
[i
].state
= TS_DEAD
;
2821 g_assert_not_reached();
2823 la_reset_pref(&s
->temps
[i
]);
2827 /* liveness analysis: sync globals back to memory and kill. */
2828 static void la_global_kill(TCGContext
*s
, int ng
)
2832 for (i
= 0; i
< ng
; i
++) {
2833 s
->temps
[i
].state
= TS_DEAD
| TS_MEM
;
2834 la_reset_pref(&s
->temps
[i
]);
2838 /* liveness analysis: note live globals crossing calls. */
2839 static void la_cross_call(TCGContext
*s
, int nt
)
2841 TCGRegSet mask
= ~tcg_target_call_clobber_regs
;
2844 for (i
= 0; i
< nt
; i
++) {
2845 TCGTemp
*ts
= &s
->temps
[i
];
2846 if (!(ts
->state
& TS_DEAD
)) {
2847 TCGRegSet
*pset
= la_temp_pref(ts
);
2848 TCGRegSet set
= *pset
;
2851 /* If the combination is not possible, restart. */
2853 set
= tcg_target_available_regs
[ts
->type
] & mask
;
2860 /* Liveness analysis : update the opc_arg_life array to tell if a
2861 given input arguments is dead. Instructions updating dead
2862 temporaries are removed. */
2863 static void liveness_pass_1(TCGContext
*s
)
2865 int nb_globals
= s
->nb_globals
;
2866 int nb_temps
= s
->nb_temps
;
2867 TCGOp
*op
, *op_prev
;
2871 prefs
= tcg_malloc(sizeof(TCGRegSet
) * nb_temps
);
2872 for (i
= 0; i
< nb_temps
; ++i
) {
2873 s
->temps
[i
].state_ptr
= prefs
+ i
;
2876 /* ??? Should be redundant with the exit_tb that ends the TB. */
2877 la_func_end(s
, nb_globals
, nb_temps
);
2879 QTAILQ_FOREACH_REVERSE_SAFE(op
, &s
->ops
, link
, op_prev
) {
2880 int nb_iargs
, nb_oargs
;
2881 TCGOpcode opc_new
, opc_new2
;
2883 TCGLifeData arg_life
= 0;
2885 TCGOpcode opc
= op
->opc
;
2886 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
2891 const TCGHelperInfo
*info
= tcg_call_info(op
);
2892 int call_flags
= tcg_call_flags(op
);
2894 nb_oargs
= TCGOP_CALLO(op
);
2895 nb_iargs
= TCGOP_CALLI(op
);
2897 /* pure functions can be removed if their result is unused */
2898 if (call_flags
& TCG_CALL_NO_SIDE_EFFECTS
) {
2899 for (i
= 0; i
< nb_oargs
; i
++) {
2900 ts
= arg_temp(op
->args
[i
]);
2901 if (ts
->state
!= TS_DEAD
) {
2902 goto do_not_remove_call
;
2909 /* Output args are dead. */
2910 for (i
= 0; i
< nb_oargs
; i
++) {
2911 ts
= arg_temp(op
->args
[i
]);
2912 if (ts
->state
& TS_DEAD
) {
2913 arg_life
|= DEAD_ARG
<< i
;
2915 if (ts
->state
& TS_MEM
) {
2916 arg_life
|= SYNC_ARG
<< i
;
2918 ts
->state
= TS_DEAD
;
2922 /* Not used -- it will be tcg_target_call_oarg_reg(). */
2923 memset(op
->output_pref
, 0, sizeof(op
->output_pref
));
2925 if (!(call_flags
& (TCG_CALL_NO_WRITE_GLOBALS
|
2926 TCG_CALL_NO_READ_GLOBALS
))) {
2927 la_global_kill(s
, nb_globals
);
2928 } else if (!(call_flags
& TCG_CALL_NO_READ_GLOBALS
)) {
2929 la_global_sync(s
, nb_globals
);
2932 /* Record arguments that die in this helper. */
2933 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
2934 ts
= arg_temp(op
->args
[i
]);
2935 if (ts
->state
& TS_DEAD
) {
2936 arg_life
|= DEAD_ARG
<< i
;
2940 /* For all live registers, remove call-clobbered prefs. */
2941 la_cross_call(s
, nb_temps
);
2944 * Input arguments are live for preceding opcodes.
2946 * For those arguments that die, and will be allocated in
2947 * registers, clear the register set for that arg, to be
2948 * filled in below. For args that will be on the stack,
2949 * reset to any available reg. Process arguments in reverse
2950 * order so that if a temp is used more than once, the stack
2951 * reset to max happens before the register reset to 0.
2953 for (i
= nb_iargs
- 1; i
>= 0; i
--) {
2954 const TCGCallArgumentLoc
*loc
= &info
->in
[i
];
2955 ts
= arg_temp(op
->args
[nb_oargs
+ i
]);
2957 if (ts
->state
& TS_DEAD
) {
2958 switch (loc
->kind
) {
2959 case TCG_CALL_ARG_NORMAL
:
2960 case TCG_CALL_ARG_EXTEND_U
:
2961 case TCG_CALL_ARG_EXTEND_S
:
2963 *la_temp_pref(ts
) = 0;
2969 tcg_target_available_regs
[ts
->type
];
2972 ts
->state
&= ~TS_DEAD
;
2977 * For each input argument, add its input register to prefs.
2978 * If a temp is used once, this produces a single set bit;
2979 * if a temp is used multiple times, this produces a set.
2981 for (i
= 0; i
< nb_iargs
; i
++) {
2982 const TCGCallArgumentLoc
*loc
= &info
->in
[i
];
2983 ts
= arg_temp(op
->args
[nb_oargs
+ i
]);
2985 switch (loc
->kind
) {
2986 case TCG_CALL_ARG_NORMAL
:
2987 case TCG_CALL_ARG_EXTEND_U
:
2988 case TCG_CALL_ARG_EXTEND_S
:
2990 tcg_regset_set_reg(*la_temp_pref(ts
),
2991 tcg_target_call_iarg_regs
[loc
->arg_slot
]);
3000 case INDEX_op_insn_start
:
3002 case INDEX_op_discard
:
3003 /* mark the temporary as dead */
3004 ts
= arg_temp(op
->args
[0]);
3005 ts
->state
= TS_DEAD
;
3009 case INDEX_op_add2_i32
:
3010 opc_new
= INDEX_op_add_i32
;
3012 case INDEX_op_sub2_i32
:
3013 opc_new
= INDEX_op_sub_i32
;
3015 case INDEX_op_add2_i64
:
3016 opc_new
= INDEX_op_add_i64
;
3018 case INDEX_op_sub2_i64
:
3019 opc_new
= INDEX_op_sub_i64
;
3023 /* Test if the high part of the operation is dead, but not
3024 the low part. The result can be optimized to a simple
3025 add or sub. This happens often for x86_64 guest when the
3026 cpu mode is set to 32 bit. */
3027 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
3028 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
3031 /* Replace the opcode and adjust the args in place,
3032 leaving 3 unused args at the end. */
3033 op
->opc
= opc
= opc_new
;
3034 op
->args
[1] = op
->args
[2];
3035 op
->args
[2] = op
->args
[4];
3036 /* Fall through and mark the single-word operation live. */
3042 case INDEX_op_mulu2_i32
:
3043 opc_new
= INDEX_op_mul_i32
;
3044 opc_new2
= INDEX_op_muluh_i32
;
3045 have_opc_new2
= TCG_TARGET_HAS_muluh_i32
;
3047 case INDEX_op_muls2_i32
:
3048 opc_new
= INDEX_op_mul_i32
;
3049 opc_new2
= INDEX_op_mulsh_i32
;
3050 have_opc_new2
= TCG_TARGET_HAS_mulsh_i32
;
3052 case INDEX_op_mulu2_i64
:
3053 opc_new
= INDEX_op_mul_i64
;
3054 opc_new2
= INDEX_op_muluh_i64
;
3055 have_opc_new2
= TCG_TARGET_HAS_muluh_i64
;
3057 case INDEX_op_muls2_i64
:
3058 opc_new
= INDEX_op_mul_i64
;
3059 opc_new2
= INDEX_op_mulsh_i64
;
3060 have_opc_new2
= TCG_TARGET_HAS_mulsh_i64
;
3065 if (arg_temp(op
->args
[1])->state
== TS_DEAD
) {
3066 if (arg_temp(op
->args
[0])->state
== TS_DEAD
) {
3067 /* Both parts of the operation are dead. */
3070 /* The high part of the operation is dead; generate the low. */
3071 op
->opc
= opc
= opc_new
;
3072 op
->args
[1] = op
->args
[2];
3073 op
->args
[2] = op
->args
[3];
3074 } else if (arg_temp(op
->args
[0])->state
== TS_DEAD
&& have_opc_new2
) {
3075 /* The low part of the operation is dead; generate the high. */
3076 op
->opc
= opc
= opc_new2
;
3077 op
->args
[0] = op
->args
[1];
3078 op
->args
[1] = op
->args
[2];
3079 op
->args
[2] = op
->args
[3];
3083 /* Mark the single-word operation live. */
3088 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3089 nb_iargs
= def
->nb_iargs
;
3090 nb_oargs
= def
->nb_oargs
;
3092 /* Test if the operation can be removed because all
3093 its outputs are dead. We assume that nb_oargs == 0
3094 implies side effects */
3095 if (!(def
->flags
& TCG_OPF_SIDE_EFFECTS
) && nb_oargs
!= 0) {
3096 for (i
= 0; i
< nb_oargs
; i
++) {
3097 if (arg_temp(op
->args
[i
])->state
!= TS_DEAD
) {
3106 tcg_op_remove(s
, op
);
3110 for (i
= 0; i
< nb_oargs
; i
++) {
3111 ts
= arg_temp(op
->args
[i
]);
3113 /* Remember the preference of the uses that followed. */
3114 if (i
< ARRAY_SIZE(op
->output_pref
)) {
3115 op
->output_pref
[i
] = *la_temp_pref(ts
);
3118 /* Output args are dead. */
3119 if (ts
->state
& TS_DEAD
) {
3120 arg_life
|= DEAD_ARG
<< i
;
3122 if (ts
->state
& TS_MEM
) {
3123 arg_life
|= SYNC_ARG
<< i
;
3125 ts
->state
= TS_DEAD
;
3129 /* If end of basic block, update. */
3130 if (def
->flags
& TCG_OPF_BB_EXIT
) {
3131 la_func_end(s
, nb_globals
, nb_temps
);
3132 } else if (def
->flags
& TCG_OPF_COND_BRANCH
) {
3133 la_bb_sync(s
, nb_globals
, nb_temps
);
3134 } else if (def
->flags
& TCG_OPF_BB_END
) {
3135 la_bb_end(s
, nb_globals
, nb_temps
);
3136 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
3137 la_global_sync(s
, nb_globals
);
3138 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
3139 la_cross_call(s
, nb_temps
);
3143 /* Record arguments that die in this opcode. */
3144 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3145 ts
= arg_temp(op
->args
[i
]);
3146 if (ts
->state
& TS_DEAD
) {
3147 arg_life
|= DEAD_ARG
<< i
;
3151 /* Input arguments are live for preceding opcodes. */
3152 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3153 ts
= arg_temp(op
->args
[i
]);
3154 if (ts
->state
& TS_DEAD
) {
3155 /* For operands that were dead, initially allow
3156 all regs for the type. */
3157 *la_temp_pref(ts
) = tcg_target_available_regs
[ts
->type
];
3158 ts
->state
&= ~TS_DEAD
;
3162 /* Incorporate constraints for this operand. */
3164 case INDEX_op_mov_i32
:
3165 case INDEX_op_mov_i64
:
3166 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3167 have proper constraints. That said, special case
3168 moves to propagate preferences backward. */
3169 if (IS_DEAD_ARG(1)) {
3170 *la_temp_pref(arg_temp(op
->args
[0]))
3171 = *la_temp_pref(arg_temp(op
->args
[1]));
3176 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
3177 const TCGArgConstraint
*ct
= &def
->args_ct
[i
];
3178 TCGRegSet set
, *pset
;
3180 ts
= arg_temp(op
->args
[i
]);
3181 pset
= la_temp_pref(ts
);
3186 set
&= output_pref(op
, ct
->alias_index
);
3188 /* If the combination is not possible, restart. */
3198 op
->life
= arg_life
;
3202 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3203 static bool liveness_pass_2(TCGContext
*s
)
3205 int nb_globals
= s
->nb_globals
;
3207 bool changes
= false;
3208 TCGOp
*op
, *op_next
;
3210 /* Create a temporary for each indirect global. */
3211 for (i
= 0; i
< nb_globals
; ++i
) {
3212 TCGTemp
*its
= &s
->temps
[i
];
3213 if (its
->indirect_reg
) {
3214 TCGTemp
*dts
= tcg_temp_alloc(s
);
3215 dts
->type
= its
->type
;
3216 dts
->base_type
= its
->base_type
;
3217 dts
->temp_subindex
= its
->temp_subindex
;
3218 dts
->kind
= TEMP_EBB
;
3219 its
->state_ptr
= dts
;
3221 its
->state_ptr
= NULL
;
3223 /* All globals begin dead. */
3224 its
->state
= TS_DEAD
;
3226 for (nb_temps
= s
->nb_temps
; i
< nb_temps
; ++i
) {
3227 TCGTemp
*its
= &s
->temps
[i
];
3228 its
->state_ptr
= NULL
;
3229 its
->state
= TS_DEAD
;
3232 QTAILQ_FOREACH_SAFE(op
, &s
->ops
, link
, op_next
) {
3233 TCGOpcode opc
= op
->opc
;
3234 const TCGOpDef
*def
= &tcg_op_defs
[opc
];
3235 TCGLifeData arg_life
= op
->life
;
3236 int nb_iargs
, nb_oargs
, call_flags
;
3237 TCGTemp
*arg_ts
, *dir_ts
;
3239 if (opc
== INDEX_op_call
) {
3240 nb_oargs
= TCGOP_CALLO(op
);
3241 nb_iargs
= TCGOP_CALLI(op
);
3242 call_flags
= tcg_call_flags(op
);
3244 nb_iargs
= def
->nb_iargs
;
3245 nb_oargs
= def
->nb_oargs
;
3247 /* Set flags similar to how calls require. */
3248 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
3249 /* Like reading globals: sync_globals */
3250 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
3251 } else if (def
->flags
& TCG_OPF_BB_END
) {
3252 /* Like writing globals: save_globals */
3254 } else if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
3255 /* Like reading globals: sync_globals */
3256 call_flags
= TCG_CALL_NO_WRITE_GLOBALS
;
3258 /* No effect on globals. */
3259 call_flags
= (TCG_CALL_NO_READ_GLOBALS
|
3260 TCG_CALL_NO_WRITE_GLOBALS
);
3264 /* Make sure that input arguments are available. */
3265 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3266 arg_ts
= arg_temp(op
->args
[i
]);
3267 dir_ts
= arg_ts
->state_ptr
;
3268 if (dir_ts
&& arg_ts
->state
== TS_DEAD
) {
3269 TCGOpcode lopc
= (arg_ts
->type
== TCG_TYPE_I32
3272 TCGOp
*lop
= tcg_op_insert_before(s
, op
, lopc
, 3);
3274 lop
->args
[0] = temp_arg(dir_ts
);
3275 lop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3276 lop
->args
[2] = arg_ts
->mem_offset
;
3278 /* Loaded, but synced with memory. */
3279 arg_ts
->state
= TS_MEM
;
3283 /* Perform input replacement, and mark inputs that became dead.
3284 No action is required except keeping temp_state up to date
3285 so that we reload when needed. */
3286 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
3287 arg_ts
= arg_temp(op
->args
[i
]);
3288 dir_ts
= arg_ts
->state_ptr
;
3290 op
->args
[i
] = temp_arg(dir_ts
);
3292 if (IS_DEAD_ARG(i
)) {
3293 arg_ts
->state
= TS_DEAD
;
3298 /* Liveness analysis should ensure that the following are
3299 all correct, for call sites and basic block end points. */
3300 if (call_flags
& TCG_CALL_NO_READ_GLOBALS
) {
3302 } else if (call_flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
3303 for (i
= 0; i
< nb_globals
; ++i
) {
3304 /* Liveness should see that globals are synced back,
3305 that is, either TS_DEAD or TS_MEM. */
3306 arg_ts
= &s
->temps
[i
];
3307 tcg_debug_assert(arg_ts
->state_ptr
== 0
3308 || arg_ts
->state
!= 0);
3311 for (i
= 0; i
< nb_globals
; ++i
) {
3312 /* Liveness should see that globals are saved back,
3313 that is, TS_DEAD, waiting to be reloaded. */
3314 arg_ts
= &s
->temps
[i
];
3315 tcg_debug_assert(arg_ts
->state_ptr
== 0
3316 || arg_ts
->state
== TS_DEAD
);
3320 /* Outputs become available. */
3321 if (opc
== INDEX_op_mov_i32
|| opc
== INDEX_op_mov_i64
) {
3322 arg_ts
= arg_temp(op
->args
[0]);
3323 dir_ts
= arg_ts
->state_ptr
;
3325 op
->args
[0] = temp_arg(dir_ts
);
3328 /* The output is now live and modified. */
3331 if (NEED_SYNC_ARG(0)) {
3332 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
3335 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
, 3);
3336 TCGTemp
*out_ts
= dir_ts
;
3338 if (IS_DEAD_ARG(0)) {
3339 out_ts
= arg_temp(op
->args
[1]);
3340 arg_ts
->state
= TS_DEAD
;
3341 tcg_op_remove(s
, op
);
3343 arg_ts
->state
= TS_MEM
;
3346 sop
->args
[0] = temp_arg(out_ts
);
3347 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3348 sop
->args
[2] = arg_ts
->mem_offset
;
3350 tcg_debug_assert(!IS_DEAD_ARG(0));
3354 for (i
= 0; i
< nb_oargs
; i
++) {
3355 arg_ts
= arg_temp(op
->args
[i
]);
3356 dir_ts
= arg_ts
->state_ptr
;
3360 op
->args
[i
] = temp_arg(dir_ts
);
3363 /* The output is now live and modified. */
3366 /* Sync outputs upon their last write. */
3367 if (NEED_SYNC_ARG(i
)) {
3368 TCGOpcode sopc
= (arg_ts
->type
== TCG_TYPE_I32
3371 TCGOp
*sop
= tcg_op_insert_after(s
, op
, sopc
, 3);
3373 sop
->args
[0] = temp_arg(dir_ts
);
3374 sop
->args
[1] = temp_arg(arg_ts
->mem_base
);
3375 sop
->args
[2] = arg_ts
->mem_offset
;
3377 arg_ts
->state
= TS_MEM
;
3379 /* Drop outputs that are dead. */
3380 if (IS_DEAD_ARG(i
)) {
3381 arg_ts
->state
= TS_DEAD
;
3390 static void temp_allocate_frame(TCGContext
*s
, TCGTemp
*ts
)
3395 /* When allocating an object, look at the full type. */
3396 size
= tcg_type_size(ts
->base_type
);
3397 switch (ts
->base_type
) {
3409 * Note that we do not require aligned storage for V256,
3410 * and that we provide alignment for I128 to match V128,
3411 * even if that's above what the host ABI requires.
3416 g_assert_not_reached();
3420 * Assume the stack is sufficiently aligned.
3421 * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3422 * and do not require 16 byte vector alignment. This seems slightly
3423 * easier than fully parameterizing the above switch statement.
3425 align
= MIN(TCG_TARGET_STACK_ALIGN
, align
);
3426 off
= ROUND_UP(s
->current_frame_offset
, align
);
3428 /* If we've exhausted the stack frame, restart with a smaller TB. */
3429 if (off
+ size
> s
->frame_end
) {
3430 tcg_raise_tb_overflow(s
);
3432 s
->current_frame_offset
= off
+ size
;
3433 #if defined(__sparc__)
3434 off
+= TCG_TARGET_STACK_BIAS
;
3437 /* If the object was subdivided, assign memory to all the parts. */
3438 if (ts
->base_type
!= ts
->type
) {
3439 int part_size
= tcg_type_size(ts
->type
);
3440 int part_count
= size
/ part_size
;
3443 * Each part is allocated sequentially in tcg_temp_new_internal.
3444 * Jump back to the first part by subtracting the current index.
3446 ts
-= ts
->temp_subindex
;
3447 for (int i
= 0; i
< part_count
; ++i
) {
3448 ts
[i
].mem_offset
= off
+ i
* part_size
;
3449 ts
[i
].mem_base
= s
->frame_temp
;
3450 ts
[i
].mem_allocated
= 1;
3453 ts
->mem_offset
= off
;
3454 ts
->mem_base
= s
->frame_temp
;
3455 ts
->mem_allocated
= 1;
3459 /* Assign @reg to @ts, and update reg_to_temp[]. */
3460 static void set_temp_val_reg(TCGContext
*s
, TCGTemp
*ts
, TCGReg reg
)
3462 if (ts
->val_type
== TEMP_VAL_REG
) {
3463 TCGReg old
= ts
->reg
;
3464 tcg_debug_assert(s
->reg_to_temp
[old
] == ts
);
3468 s
->reg_to_temp
[old
] = NULL
;
3470 tcg_debug_assert(s
->reg_to_temp
[reg
] == NULL
);
3471 s
->reg_to_temp
[reg
] = ts
;
3472 ts
->val_type
= TEMP_VAL_REG
;
3476 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3477 static void set_temp_val_nonreg(TCGContext
*s
, TCGTemp
*ts
, TCGTempVal type
)
3479 tcg_debug_assert(type
!= TEMP_VAL_REG
);
3480 if (ts
->val_type
== TEMP_VAL_REG
) {
3481 TCGReg reg
= ts
->reg
;
3482 tcg_debug_assert(s
->reg_to_temp
[reg
] == ts
);
3483 s
->reg_to_temp
[reg
] = NULL
;
3485 ts
->val_type
= type
;
3488 static void temp_load(TCGContext
*, TCGTemp
*, TCGRegSet
, TCGRegSet
, TCGRegSet
);
3490 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3491 mark it free; otherwise mark it dead. */
3492 static void temp_free_or_dead(TCGContext
*s
, TCGTemp
*ts
, int free_or_dead
)
3494 TCGTempVal new_type
;
3501 new_type
= TEMP_VAL_MEM
;
3505 new_type
= free_or_dead
< 0 ? TEMP_VAL_MEM
: TEMP_VAL_DEAD
;
3508 new_type
= TEMP_VAL_CONST
;
3511 g_assert_not_reached();
3513 set_temp_val_nonreg(s
, ts
, new_type
);
3516 /* Mark a temporary as dead. */
3517 static inline void temp_dead(TCGContext
*s
, TCGTemp
*ts
)
3519 temp_free_or_dead(s
, ts
, 1);
3522 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3523 registers needs to be allocated to store a constant. If 'free_or_dead'
3524 is non-zero, subsequently release the temporary; if it is positive, the
3525 temp is dead; if it is negative, the temp is free. */
3526 static void temp_sync(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
,
3527 TCGRegSet preferred_regs
, int free_or_dead
)
3529 if (!temp_readonly(ts
) && !ts
->mem_coherent
) {
3530 if (!ts
->mem_allocated
) {
3531 temp_allocate_frame(s
, ts
);
3533 switch (ts
->val_type
) {
3534 case TEMP_VAL_CONST
:
3535 /* If we're going to free the temp immediately, then we won't
3536 require it later in a register, so attempt to store the
3537 constant to memory directly. */
3539 && tcg_out_sti(s
, ts
->type
, ts
->val
,
3540 ts
->mem_base
->reg
, ts
->mem_offset
)) {
3543 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
3544 allocated_regs
, preferred_regs
);
3548 tcg_out_st(s
, ts
->type
, ts
->reg
,
3549 ts
->mem_base
->reg
, ts
->mem_offset
);
3559 ts
->mem_coherent
= 1;
3562 temp_free_or_dead(s
, ts
, free_or_dead
);
3566 /* free register 'reg' by spilling the corresponding temporary if necessary */
3567 static void tcg_reg_free(TCGContext
*s
, TCGReg reg
, TCGRegSet allocated_regs
)
3569 TCGTemp
*ts
= s
->reg_to_temp
[reg
];
3571 temp_sync(s
, ts
, allocated_regs
, 0, -1);
3577 * @required_regs: Set of registers in which we must allocate.
3578 * @allocated_regs: Set of registers which must be avoided.
3579 * @preferred_regs: Set of registers we should prefer.
3580 * @rev: True if we search the registers in "indirect" order.
3582 * The allocated register must be in @required_regs & ~@allocated_regs,
3583 * but if we can put it in @preferred_regs we may save a move later.
3585 static TCGReg
tcg_reg_alloc(TCGContext
*s
, TCGRegSet required_regs
,
3586 TCGRegSet allocated_regs
,
3587 TCGRegSet preferred_regs
, bool rev
)
3589 int i
, j
, f
, n
= ARRAY_SIZE(tcg_target_reg_alloc_order
);
3590 TCGRegSet reg_ct
[2];
3593 reg_ct
[1] = required_regs
& ~allocated_regs
;
3594 tcg_debug_assert(reg_ct
[1] != 0);
3595 reg_ct
[0] = reg_ct
[1] & preferred_regs
;
3597 /* Skip the preferred_regs option if it cannot be satisfied,
3598 or if the preference made no difference. */
3599 f
= reg_ct
[0] == 0 || reg_ct
[0] == reg_ct
[1];
3601 order
= rev
? indirect_reg_alloc_order
: tcg_target_reg_alloc_order
;
3603 /* Try free registers, preferences first. */
3604 for (j
= f
; j
< 2; j
++) {
3605 TCGRegSet set
= reg_ct
[j
];
3607 if (tcg_regset_single(set
)) {
3608 /* One register in the set. */
3609 TCGReg reg
= tcg_regset_first(set
);
3610 if (s
->reg_to_temp
[reg
] == NULL
) {
3614 for (i
= 0; i
< n
; i
++) {
3615 TCGReg reg
= order
[i
];
3616 if (s
->reg_to_temp
[reg
] == NULL
&&
3617 tcg_regset_test_reg(set
, reg
)) {
3624 /* We must spill something. */
3625 for (j
= f
; j
< 2; j
++) {
3626 TCGRegSet set
= reg_ct
[j
];
3628 if (tcg_regset_single(set
)) {
3629 /* One register in the set. */
3630 TCGReg reg
= tcg_regset_first(set
);
3631 tcg_reg_free(s
, reg
, allocated_regs
);
3634 for (i
= 0; i
< n
; i
++) {
3635 TCGReg reg
= order
[i
];
3636 if (tcg_regset_test_reg(set
, reg
)) {
3637 tcg_reg_free(s
, reg
, allocated_regs
);
3647 static TCGReg
tcg_reg_alloc_pair(TCGContext
*s
, TCGRegSet required_regs
,
3648 TCGRegSet allocated_regs
,
3649 TCGRegSet preferred_regs
, bool rev
)
3651 int i
, j
, k
, fmin
, n
= ARRAY_SIZE(tcg_target_reg_alloc_order
);
3652 TCGRegSet reg_ct
[2];
3655 /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3656 reg_ct
[1] = required_regs
& ~(allocated_regs
| (allocated_regs
>> 1));
3657 tcg_debug_assert(reg_ct
[1] != 0);
3658 reg_ct
[0] = reg_ct
[1] & preferred_regs
;
3660 order
= rev
? indirect_reg_alloc_order
: tcg_target_reg_alloc_order
;
3663 * Skip the preferred_regs option if it cannot be satisfied,
3664 * or if the preference made no difference.
3666 k
= reg_ct
[0] == 0 || reg_ct
[0] == reg_ct
[1];
3669 * Minimize the number of flushes by looking for 2 free registers first,
3670 * then a single flush, then two flushes.
3672 for (fmin
= 2; fmin
>= 0; fmin
--) {
3673 for (j
= k
; j
< 2; j
++) {
3674 TCGRegSet set
= reg_ct
[j
];
3676 for (i
= 0; i
< n
; i
++) {
3677 TCGReg reg
= order
[i
];
3679 if (tcg_regset_test_reg(set
, reg
)) {
3680 int f
= !s
->reg_to_temp
[reg
] + !s
->reg_to_temp
[reg
+ 1];
3682 tcg_reg_free(s
, reg
, allocated_regs
);
3683 tcg_reg_free(s
, reg
+ 1, allocated_regs
);
3693 /* Make sure the temporary is in a register. If needed, allocate the register
3694 from DESIRED while avoiding ALLOCATED. */
3695 static void temp_load(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet desired_regs
,
3696 TCGRegSet allocated_regs
, TCGRegSet preferred_regs
)
3700 switch (ts
->val_type
) {
3703 case TEMP_VAL_CONST
:
3704 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3705 preferred_regs
, ts
->indirect_base
);
3706 if (ts
->type
<= TCG_TYPE_I64
) {
3707 tcg_out_movi(s
, ts
->type
, reg
, ts
->val
);
3709 uint64_t val
= ts
->val
;
3713 * Find the minimal vector element that matches the constant.
3714 * The targets will, in general, have to do this search anyway,
3715 * do this generically.
3717 if (val
== dup_const(MO_8
, val
)) {
3719 } else if (val
== dup_const(MO_16
, val
)) {
3721 } else if (val
== dup_const(MO_32
, val
)) {
3725 tcg_out_dupi_vec(s
, ts
->type
, vece
, reg
, ts
->val
);
3727 ts
->mem_coherent
= 0;
3730 reg
= tcg_reg_alloc(s
, desired_regs
, allocated_regs
,
3731 preferred_regs
, ts
->indirect_base
);
3732 tcg_out_ld(s
, ts
->type
, reg
, ts
->mem_base
->reg
, ts
->mem_offset
);
3733 ts
->mem_coherent
= 1;
3739 set_temp_val_reg(s
, ts
, reg
);
3742 /* Save a temporary to memory. 'allocated_regs' is used in case a
3743 temporary registers needs to be allocated to store a constant. */
3744 static void temp_save(TCGContext
*s
, TCGTemp
*ts
, TCGRegSet allocated_regs
)
3746 /* The liveness analysis already ensures that globals are back
3747 in memory. Keep an tcg_debug_assert for safety. */
3748 tcg_debug_assert(ts
->val_type
== TEMP_VAL_MEM
|| temp_readonly(ts
));
3751 /* save globals to their canonical location and assume they can be
3752 modified be the following code. 'allocated_regs' is used in case a
3753 temporary registers needs to be allocated to store a constant. */
3754 static void save_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3758 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3759 temp_save(s
, &s
->temps
[i
], allocated_regs
);
3763 /* sync globals to their canonical location and assume they can be
3764 read by the following code. 'allocated_regs' is used in case a
3765 temporary registers needs to be allocated to store a constant. */
3766 static void sync_globals(TCGContext
*s
, TCGRegSet allocated_regs
)
3770 for (i
= 0, n
= s
->nb_globals
; i
< n
; i
++) {
3771 TCGTemp
*ts
= &s
->temps
[i
];
3772 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
3773 || ts
->kind
== TEMP_FIXED
3774 || ts
->mem_coherent
);
3778 /* at the end of a basic block, we assume all temporaries are dead and
3779 all globals are stored at their canonical location. */
3780 static void tcg_reg_alloc_bb_end(TCGContext
*s
, TCGRegSet allocated_regs
)
3784 for (i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3785 TCGTemp
*ts
= &s
->temps
[i
];
3789 temp_save(s
, ts
, allocated_regs
);
3793 /* The liveness analysis already ensures that temps are dead.
3794 Keep an tcg_debug_assert for safety. */
3795 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3798 /* Similarly, we should have freed any allocated register. */
3799 tcg_debug_assert(ts
->val_type
== TEMP_VAL_CONST
);
3802 g_assert_not_reached();
3806 save_globals(s
, allocated_regs
);
3810 * At a conditional branch, we assume all temporaries are dead unless
3811 * explicitly live-across-conditional-branch; all globals and local
3812 * temps are synced to their location.
3814 static void tcg_reg_alloc_cbranch(TCGContext
*s
, TCGRegSet allocated_regs
)
3816 sync_globals(s
, allocated_regs
);
3818 for (int i
= s
->nb_globals
; i
< s
->nb_temps
; i
++) {
3819 TCGTemp
*ts
= &s
->temps
[i
];
3821 * The liveness analysis already ensures that temps are dead.
3822 * Keep tcg_debug_asserts for safety.
3826 tcg_debug_assert(ts
->val_type
!= TEMP_VAL_REG
|| ts
->mem_coherent
);
3829 tcg_debug_assert(ts
->val_type
== TEMP_VAL_DEAD
);
3835 g_assert_not_reached();
3841 * Specialized code generation for INDEX_op_mov_* with a constant.
3843 static void tcg_reg_alloc_do_movi(TCGContext
*s
, TCGTemp
*ots
,
3844 tcg_target_ulong val
, TCGLifeData arg_life
,
3845 TCGRegSet preferred_regs
)
3847 /* ENV should not be modified. */
3848 tcg_debug_assert(!temp_readonly(ots
));
3850 /* The movi is not explicitly generated here. */
3851 set_temp_val_nonreg(s
, ots
, TEMP_VAL_CONST
);
3853 ots
->mem_coherent
= 0;
3854 if (NEED_SYNC_ARG(0)) {
3855 temp_sync(s
, ots
, s
->reserved_regs
, preferred_regs
, IS_DEAD_ARG(0));
3856 } else if (IS_DEAD_ARG(0)) {
3862 * Specialized code generation for INDEX_op_mov_*.
3864 static void tcg_reg_alloc_mov(TCGContext
*s
, const TCGOp
*op
)
3866 const TCGLifeData arg_life
= op
->life
;
3867 TCGRegSet allocated_regs
, preferred_regs
;
3869 TCGType otype
, itype
;
3872 allocated_regs
= s
->reserved_regs
;
3873 preferred_regs
= output_pref(op
, 0);
3874 ots
= arg_temp(op
->args
[0]);
3875 ts
= arg_temp(op
->args
[1]);
3877 /* ENV should not be modified. */
3878 tcg_debug_assert(!temp_readonly(ots
));
3880 /* Note that otype != itype for no-op truncation. */
3884 if (ts
->val_type
== TEMP_VAL_CONST
) {
3885 /* propagate constant or generate sti */
3886 tcg_target_ulong val
= ts
->val
;
3887 if (IS_DEAD_ARG(1)) {
3890 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, preferred_regs
);
3894 /* If the source value is in memory we're going to be forced
3895 to have it in a register in order to perform the copy. Copy
3896 the SOURCE value into its own register first, that way we
3897 don't have to reload SOURCE the next time it is used. */
3898 if (ts
->val_type
== TEMP_VAL_MEM
) {
3899 temp_load(s
, ts
, tcg_target_available_regs
[itype
],
3900 allocated_regs
, preferred_regs
);
3902 tcg_debug_assert(ts
->val_type
== TEMP_VAL_REG
);
3905 if (IS_DEAD_ARG(0)) {
3906 /* mov to a non-saved dead register makes no sense (even with
3907 liveness analysis disabled). */
3908 tcg_debug_assert(NEED_SYNC_ARG(0));
3909 if (!ots
->mem_allocated
) {
3910 temp_allocate_frame(s
, ots
);
3912 tcg_out_st(s
, otype
, ireg
, ots
->mem_base
->reg
, ots
->mem_offset
);
3913 if (IS_DEAD_ARG(1)) {
3920 if (IS_DEAD_ARG(1) && ts
->kind
!= TEMP_FIXED
) {
3922 * The mov can be suppressed. Kill input first, so that it
3923 * is unlinked from reg_to_temp, then set the output to the
3924 * reg that we saved from the input.
3929 if (ots
->val_type
== TEMP_VAL_REG
) {
3932 /* Make sure to not spill the input register during allocation. */
3933 oreg
= tcg_reg_alloc(s
, tcg_target_available_regs
[otype
],
3934 allocated_regs
| ((TCGRegSet
)1 << ireg
),
3935 preferred_regs
, ots
->indirect_base
);
3937 if (!tcg_out_mov(s
, otype
, oreg
, ireg
)) {
3939 * Cross register class move not supported.
3940 * Store the source register into the destination slot
3941 * and leave the destination temp as TEMP_VAL_MEM.
3943 assert(!temp_readonly(ots
));
3944 if (!ts
->mem_allocated
) {
3945 temp_allocate_frame(s
, ots
);
3947 tcg_out_st(s
, ts
->type
, ireg
, ots
->mem_base
->reg
, ots
->mem_offset
);
3948 set_temp_val_nonreg(s
, ts
, TEMP_VAL_MEM
);
3949 ots
->mem_coherent
= 1;
3953 set_temp_val_reg(s
, ots
, oreg
);
3954 ots
->mem_coherent
= 0;
3956 if (NEED_SYNC_ARG(0)) {
3957 temp_sync(s
, ots
, allocated_regs
, 0, 0);
3962 * Specialized code generation for INDEX_op_dup_vec.
3964 static void tcg_reg_alloc_dup(TCGContext
*s
, const TCGOp
*op
)
3966 const TCGLifeData arg_life
= op
->life
;
3967 TCGRegSet dup_out_regs
, dup_in_regs
;
3969 TCGType itype
, vtype
;
3974 ots
= arg_temp(op
->args
[0]);
3975 its
= arg_temp(op
->args
[1]);
3977 /* ENV should not be modified. */
3978 tcg_debug_assert(!temp_readonly(ots
));
3981 vece
= TCGOP_VECE(op
);
3982 vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
3984 if (its
->val_type
== TEMP_VAL_CONST
) {
3985 /* Propagate constant via movi -> dupi. */
3986 tcg_target_ulong val
= its
->val
;
3987 if (IS_DEAD_ARG(1)) {
3990 tcg_reg_alloc_do_movi(s
, ots
, val
, arg_life
, output_pref(op
, 0));
3994 dup_out_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
3995 dup_in_regs
= tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[1].regs
;
3997 /* Allocate the output register now. */
3998 if (ots
->val_type
!= TEMP_VAL_REG
) {
3999 TCGRegSet allocated_regs
= s
->reserved_regs
;
4002 if (!IS_DEAD_ARG(1) && its
->val_type
== TEMP_VAL_REG
) {
4003 /* Make sure to not spill the input register. */
4004 tcg_regset_set_reg(allocated_regs
, its
->reg
);
4006 oreg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
4007 output_pref(op
, 0), ots
->indirect_base
);
4008 set_temp_val_reg(s
, ots
, oreg
);
4011 switch (its
->val_type
) {
4014 * The dup constriaints must be broad, covering all possible VECE.
4015 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4016 * to fail, indicating that extra moves are required for that case.
4018 if (tcg_regset_test_reg(dup_in_regs
, its
->reg
)) {
4019 if (tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, its
->reg
)) {
4022 /* Try again from memory or a vector input register. */
4024 if (!its
->mem_coherent
) {
4026 * The input register is not synced, and so an extra store
4027 * would be required to use memory. Attempt an integer-vector
4028 * register move first. We do not have a TCGRegSet for this.
4030 if (tcg_out_mov(s
, itype
, ots
->reg
, its
->reg
)) {
4033 /* Sync the temp back to its slot and load from there. */
4034 temp_sync(s
, its
, s
->reserved_regs
, 0, 0);
4040 if (HOST_BIG_ENDIAN
) {
4041 lowpart_ofs
= tcg_type_size(itype
) - (1 << vece
);
4043 if (tcg_out_dupm_vec(s
, vtype
, vece
, ots
->reg
, its
->mem_base
->reg
,
4044 its
->mem_offset
+ lowpart_ofs
)) {
4047 /* Load the input into the destination vector register. */
4048 tcg_out_ld(s
, itype
, ots
->reg
, its
->mem_base
->reg
, its
->mem_offset
);
4052 g_assert_not_reached();
4055 /* We now have a vector input register, so dup must succeed. */
4056 ok
= tcg_out_dup_vec(s
, vtype
, vece
, ots
->reg
, ots
->reg
);
4057 tcg_debug_assert(ok
);
4060 ots
->mem_coherent
= 0;
4061 if (IS_DEAD_ARG(1)) {
4064 if (NEED_SYNC_ARG(0)) {
4065 temp_sync(s
, ots
, s
->reserved_regs
, 0, 0);
4067 if (IS_DEAD_ARG(0)) {
4072 static void tcg_reg_alloc_op(TCGContext
*s
, const TCGOp
*op
)
4074 const TCGLifeData arg_life
= op
->life
;
4075 const TCGOpDef
* const def
= &tcg_op_defs
[op
->opc
];
4076 TCGRegSet i_allocated_regs
;
4077 TCGRegSet o_allocated_regs
;
4078 int i
, k
, nb_iargs
, nb_oargs
;
4081 const TCGArgConstraint
*arg_ct
;
4083 TCGArg new_args
[TCG_MAX_OP_ARGS
];
4084 int const_args
[TCG_MAX_OP_ARGS
];
4086 nb_oargs
= def
->nb_oargs
;
4087 nb_iargs
= def
->nb_iargs
;
4089 /* copy constants */
4090 memcpy(new_args
+ nb_oargs
+ nb_iargs
,
4091 op
->args
+ nb_oargs
+ nb_iargs
,
4092 sizeof(TCGArg
) * def
->nb_cargs
);
4094 i_allocated_regs
= s
->reserved_regs
;
4095 o_allocated_regs
= s
->reserved_regs
;
4097 /* satisfy input constraints */
4098 for (k
= 0; k
< nb_iargs
; k
++) {
4099 TCGRegSet i_preferred_regs
, i_required_regs
;
4100 bool allocate_new_reg
, copyto_new_reg
;
4104 i
= def
->args_ct
[nb_oargs
+ k
].sort_index
;
4106 arg_ct
= &def
->args_ct
[i
];
4109 if (ts
->val_type
== TEMP_VAL_CONST
4110 && tcg_target_const_match(ts
->val
, ts
->type
, arg_ct
->ct
)) {
4111 /* constant is OK for instruction */
4113 new_args
[i
] = ts
->val
;
4118 i_preferred_regs
= 0;
4119 i_required_regs
= arg_ct
->regs
;
4120 allocate_new_reg
= false;
4121 copyto_new_reg
= false;
4123 switch (arg_ct
->pair
) {
4124 case 0: /* not paired */
4125 if (arg_ct
->ialias
) {
4126 i_preferred_regs
= output_pref(op
, arg_ct
->alias_index
);
4129 * If the input is readonly, then it cannot also be an
4130 * output and aliased to itself. If the input is not
4131 * dead after the instruction, we must allocate a new
4132 * register and move it.
4134 if (temp_readonly(ts
) || !IS_DEAD_ARG(i
)) {
4135 allocate_new_reg
= true;
4136 } else if (ts
->val_type
== TEMP_VAL_REG
) {
4138 * Check if the current register has already been
4139 * allocated for another input.
4142 tcg_regset_test_reg(i_allocated_regs
, reg
);
4145 if (!allocate_new_reg
) {
4146 temp_load(s
, ts
, i_required_regs
, i_allocated_regs
,
4149 allocate_new_reg
= !tcg_regset_test_reg(i_required_regs
, reg
);
4151 if (allocate_new_reg
) {
4153 * Allocate a new register matching the constraint
4154 * and move the temporary register into it.
4156 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
4157 i_allocated_regs
, 0);
4158 reg
= tcg_reg_alloc(s
, i_required_regs
, i_allocated_regs
,
4159 i_preferred_regs
, ts
->indirect_base
);
4160 copyto_new_reg
= true;
4165 /* First of an input pair; if i1 == i2, the second is an output. */
4167 i2
= arg_ct
->pair_index
;
4168 ts2
= i1
!= i2
? arg_temp(op
->args
[i2
]) : NULL
;
4171 * It is easier to default to allocating a new pair
4172 * and to identify a few cases where it's not required.
4174 if (arg_ct
->ialias
) {
4175 i_preferred_regs
= output_pref(op
, arg_ct
->alias_index
);
4176 if (IS_DEAD_ARG(i1
) &&
4178 !temp_readonly(ts
) &&
4179 ts
->val_type
== TEMP_VAL_REG
&&
4180 ts
->reg
< TCG_TARGET_NB_REGS
- 1 &&
4181 tcg_regset_test_reg(i_required_regs
, reg
) &&
4182 !tcg_regset_test_reg(i_allocated_regs
, reg
) &&
4183 !tcg_regset_test_reg(i_allocated_regs
, reg
+ 1) &&
4185 ? ts2
->val_type
== TEMP_VAL_REG
&&
4186 ts2
->reg
== reg
+ 1 &&
4188 : s
->reg_to_temp
[reg
+ 1] == NULL
)) {
4192 /* Without aliasing, the pair must also be an input. */
4193 tcg_debug_assert(ts2
);
4194 if (ts
->val_type
== TEMP_VAL_REG
&&
4195 ts2
->val_type
== TEMP_VAL_REG
&&
4196 ts2
->reg
== reg
+ 1 &&
4197 tcg_regset_test_reg(i_required_regs
, reg
)) {
4201 reg
= tcg_reg_alloc_pair(s
, i_required_regs
, i_allocated_regs
,
4202 0, ts
->indirect_base
);
4205 case 2: /* pair second */
4206 reg
= new_args
[arg_ct
->pair_index
] + 1;
4209 case 3: /* ialias with second output, no first input */
4210 tcg_debug_assert(arg_ct
->ialias
);
4211 i_preferred_regs
= output_pref(op
, arg_ct
->alias_index
);
4213 if (IS_DEAD_ARG(i
) &&
4214 !temp_readonly(ts
) &&
4215 ts
->val_type
== TEMP_VAL_REG
&&
4217 s
->reg_to_temp
[reg
- 1] == NULL
&&
4218 tcg_regset_test_reg(i_required_regs
, reg
) &&
4219 !tcg_regset_test_reg(i_allocated_regs
, reg
) &&
4220 !tcg_regset_test_reg(i_allocated_regs
, reg
- 1)) {
4221 tcg_regset_set_reg(i_allocated_regs
, reg
- 1);
4224 reg
= tcg_reg_alloc_pair(s
, i_required_regs
>> 1,
4225 i_allocated_regs
, 0,
4227 tcg_regset_set_reg(i_allocated_regs
, reg
);
4233 * If an aliased input is not dead after the instruction,
4234 * we must allocate a new register and move it.
4236 if (arg_ct
->ialias
&& (!IS_DEAD_ARG(i
) || temp_readonly(ts
))) {
4237 TCGRegSet t_allocated_regs
= i_allocated_regs
;
4240 * Because of the alias, and the continued life, make sure
4241 * that the temp is somewhere *other* than the reg pair,
4242 * and we get a copy in reg.
4244 tcg_regset_set_reg(t_allocated_regs
, reg
);
4245 tcg_regset_set_reg(t_allocated_regs
, reg
+ 1);
4246 if (ts
->val_type
== TEMP_VAL_REG
&& ts
->reg
== reg
) {
4247 /* If ts was already in reg, copy it somewhere else. */
4251 tcg_debug_assert(ts
->kind
!= TEMP_FIXED
);
4252 nr
= tcg_reg_alloc(s
, tcg_target_available_regs
[ts
->type
],
4253 t_allocated_regs
, 0, ts
->indirect_base
);
4254 ok
= tcg_out_mov(s
, ts
->type
, nr
, reg
);
4255 tcg_debug_assert(ok
);
4257 set_temp_val_reg(s
, ts
, nr
);
4259 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
],
4260 t_allocated_regs
, 0);
4261 copyto_new_reg
= true;
4264 /* Preferably allocate to reg, otherwise copy. */
4265 i_required_regs
= (TCGRegSet
)1 << reg
;
4266 temp_load(s
, ts
, i_required_regs
, i_allocated_regs
,
4268 copyto_new_reg
= ts
->reg
!= reg
;
4273 g_assert_not_reached();
4276 if (copyto_new_reg
) {
4277 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
4279 * Cross register class move not supported. Sync the
4280 * temp back to its slot and load from there.
4282 temp_sync(s
, ts
, i_allocated_regs
, 0, 0);
4283 tcg_out_ld(s
, ts
->type
, reg
,
4284 ts
->mem_base
->reg
, ts
->mem_offset
);
4289 tcg_regset_set_reg(i_allocated_regs
, reg
);
4292 /* mark dead temporaries and free the associated registers */
4293 for (i
= nb_oargs
; i
< nb_oargs
+ nb_iargs
; i
++) {
4294 if (IS_DEAD_ARG(i
)) {
4295 temp_dead(s
, arg_temp(op
->args
[i
]));
4299 if (def
->flags
& TCG_OPF_COND_BRANCH
) {
4300 tcg_reg_alloc_cbranch(s
, i_allocated_regs
);
4301 } else if (def
->flags
& TCG_OPF_BB_END
) {
4302 tcg_reg_alloc_bb_end(s
, i_allocated_regs
);
4304 if (def
->flags
& TCG_OPF_CALL_CLOBBER
) {
4305 /* XXX: permit generic clobber register list ? */
4306 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
4307 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
4308 tcg_reg_free(s
, i
, i_allocated_regs
);
4312 if (def
->flags
& TCG_OPF_SIDE_EFFECTS
) {
4313 /* sync globals if the op has side effects and might trigger
4315 sync_globals(s
, i_allocated_regs
);
4318 /* satisfy the output constraints */
4319 for(k
= 0; k
< nb_oargs
; k
++) {
4320 i
= def
->args_ct
[k
].sort_index
;
4322 arg_ct
= &def
->args_ct
[i
];
4325 /* ENV should not be modified. */
4326 tcg_debug_assert(!temp_readonly(ts
));
4328 switch (arg_ct
->pair
) {
4329 case 0: /* not paired */
4330 if (arg_ct
->oalias
&& !const_args
[arg_ct
->alias_index
]) {
4331 reg
= new_args
[arg_ct
->alias_index
];
4332 } else if (arg_ct
->newreg
) {
4333 reg
= tcg_reg_alloc(s
, arg_ct
->regs
,
4334 i_allocated_regs
| o_allocated_regs
,
4335 output_pref(op
, k
), ts
->indirect_base
);
4337 reg
= tcg_reg_alloc(s
, arg_ct
->regs
, o_allocated_regs
,
4338 output_pref(op
, k
), ts
->indirect_base
);
4342 case 1: /* first of pair */
4343 tcg_debug_assert(!arg_ct
->newreg
);
4344 if (arg_ct
->oalias
) {
4345 reg
= new_args
[arg_ct
->alias_index
];
4348 reg
= tcg_reg_alloc_pair(s
, arg_ct
->regs
, o_allocated_regs
,
4349 output_pref(op
, k
), ts
->indirect_base
);
4352 case 2: /* second of pair */
4353 tcg_debug_assert(!arg_ct
->newreg
);
4354 if (arg_ct
->oalias
) {
4355 reg
= new_args
[arg_ct
->alias_index
];
4357 reg
= new_args
[arg_ct
->pair_index
] + 1;
4361 case 3: /* first of pair, aliasing with a second input */
4362 tcg_debug_assert(!arg_ct
->newreg
);
4363 reg
= new_args
[arg_ct
->pair_index
] - 1;
4367 g_assert_not_reached();
4369 tcg_regset_set_reg(o_allocated_regs
, reg
);
4370 set_temp_val_reg(s
, ts
, reg
);
4371 ts
->mem_coherent
= 0;
4376 /* emit instruction */
4377 if (def
->flags
& TCG_OPF_VECTOR
) {
4378 tcg_out_vec_op(s
, op
->opc
, TCGOP_VECL(op
), TCGOP_VECE(op
),
4379 new_args
, const_args
);
4381 tcg_out_op(s
, op
->opc
, new_args
, const_args
);
4384 /* move the outputs in the correct register if needed */
4385 for(i
= 0; i
< nb_oargs
; i
++) {
4386 ts
= arg_temp(op
->args
[i
]);
4388 /* ENV should not be modified. */
4389 tcg_debug_assert(!temp_readonly(ts
));
4391 if (NEED_SYNC_ARG(i
)) {
4392 temp_sync(s
, ts
, o_allocated_regs
, 0, IS_DEAD_ARG(i
));
4393 } else if (IS_DEAD_ARG(i
)) {
4399 static bool tcg_reg_alloc_dup2(TCGContext
*s
, const TCGOp
*op
)
4401 const TCGLifeData arg_life
= op
->life
;
4402 TCGTemp
*ots
, *itsl
, *itsh
;
4403 TCGType vtype
= TCGOP_VECL(op
) + TCG_TYPE_V64
;
4405 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4406 tcg_debug_assert(TCG_TARGET_REG_BITS
== 32);
4407 tcg_debug_assert(TCGOP_VECE(op
) == MO_64
);
4409 ots
= arg_temp(op
->args
[0]);
4410 itsl
= arg_temp(op
->args
[1]);
4411 itsh
= arg_temp(op
->args
[2]);
4413 /* ENV should not be modified. */
4414 tcg_debug_assert(!temp_readonly(ots
));
4416 /* Allocate the output register now. */
4417 if (ots
->val_type
!= TEMP_VAL_REG
) {
4418 TCGRegSet allocated_regs
= s
->reserved_regs
;
4419 TCGRegSet dup_out_regs
=
4420 tcg_op_defs
[INDEX_op_dup_vec
].args_ct
[0].regs
;
4423 /* Make sure to not spill the input registers. */
4424 if (!IS_DEAD_ARG(1) && itsl
->val_type
== TEMP_VAL_REG
) {
4425 tcg_regset_set_reg(allocated_regs
, itsl
->reg
);
4427 if (!IS_DEAD_ARG(2) && itsh
->val_type
== TEMP_VAL_REG
) {
4428 tcg_regset_set_reg(allocated_regs
, itsh
->reg
);
4431 oreg
= tcg_reg_alloc(s
, dup_out_regs
, allocated_regs
,
4432 output_pref(op
, 0), ots
->indirect_base
);
4433 set_temp_val_reg(s
, ots
, oreg
);
4436 /* Promote dup2 of immediates to dupi_vec. */
4437 if (itsl
->val_type
== TEMP_VAL_CONST
&& itsh
->val_type
== TEMP_VAL_CONST
) {
4438 uint64_t val
= deposit64(itsl
->val
, 32, 32, itsh
->val
);
4441 if (val
== dup_const(MO_8
, val
)) {
4443 } else if (val
== dup_const(MO_16
, val
)) {
4445 } else if (val
== dup_const(MO_32
, val
)) {
4449 tcg_out_dupi_vec(s
, vtype
, vece
, ots
->reg
, val
);
4453 /* If the two inputs form one 64-bit value, try dupm_vec. */
4454 if (itsl
->temp_subindex
== HOST_BIG_ENDIAN
&&
4455 itsh
->temp_subindex
== !HOST_BIG_ENDIAN
&&
4456 itsl
== itsh
+ (HOST_BIG_ENDIAN
? 1 : -1)) {
4457 TCGTemp
*its
= itsl
- HOST_BIG_ENDIAN
;
4459 temp_sync(s
, its
+ 0, s
->reserved_regs
, 0, 0);
4460 temp_sync(s
, its
+ 1, s
->reserved_regs
, 0, 0);
4462 if (tcg_out_dupm_vec(s
, vtype
, MO_64
, ots
->reg
,
4463 its
->mem_base
->reg
, its
->mem_offset
)) {
4468 /* Fall back to generic expansion. */
4472 ots
->mem_coherent
= 0;
4473 if (IS_DEAD_ARG(1)) {
4476 if (IS_DEAD_ARG(2)) {
4479 if (NEED_SYNC_ARG(0)) {
4480 temp_sync(s
, ots
, s
->reserved_regs
, 0, IS_DEAD_ARG(0));
4481 } else if (IS_DEAD_ARG(0)) {
4487 static void load_arg_reg(TCGContext
*s
, TCGReg reg
, TCGTemp
*ts
,
4488 TCGRegSet allocated_regs
)
4490 if (ts
->val_type
== TEMP_VAL_REG
) {
4491 if (ts
->reg
!= reg
) {
4492 tcg_reg_free(s
, reg
, allocated_regs
);
4493 if (!tcg_out_mov(s
, ts
->type
, reg
, ts
->reg
)) {
4495 * Cross register class move not supported. Sync the
4496 * temp back to its slot and load from there.
4498 temp_sync(s
, ts
, allocated_regs
, 0, 0);
4499 tcg_out_ld(s
, ts
->type
, reg
,
4500 ts
->mem_base
->reg
, ts
->mem_offset
);
4504 TCGRegSet arg_set
= 0;
4506 tcg_reg_free(s
, reg
, allocated_regs
);
4507 tcg_regset_set_reg(arg_set
, reg
);
4508 temp_load(s
, ts
, arg_set
, allocated_regs
, 0);
4512 static void load_arg_stk(TCGContext
*s
, int stk_slot
, TCGTemp
*ts
,
4513 TCGRegSet allocated_regs
)
4516 * When the destination is on the stack, load up the temp and store.
4517 * If there are many call-saved registers, the temp might live to
4518 * see another use; otherwise it'll be discarded.
4520 temp_load(s
, ts
, tcg_target_available_regs
[ts
->type
], allocated_regs
, 0);
4521 tcg_out_st(s
, ts
->type
, ts
->reg
, TCG_REG_CALL_STACK
,
4522 TCG_TARGET_CALL_STACK_OFFSET
+
4523 stk_slot
* sizeof(tcg_target_long
));
4526 static void load_arg_normal(TCGContext
*s
, const TCGCallArgumentLoc
*l
,
4527 TCGTemp
*ts
, TCGRegSet
*allocated_regs
)
4530 TCGReg reg
= tcg_target_call_iarg_regs
[l
->arg_slot
];
4531 load_arg_reg(s
, reg
, ts
, *allocated_regs
);
4532 tcg_regset_set_reg(*allocated_regs
, reg
);
4534 load_arg_stk(s
, l
->arg_slot
- ARRAY_SIZE(tcg_target_call_iarg_regs
),
4535 ts
, *allocated_regs
);
4539 static void load_arg_ref(TCGContext
*s
, int arg_slot
, TCGReg ref_base
,
4540 intptr_t ref_off
, TCGRegSet
*allocated_regs
)
4543 int stk_slot
= arg_slot
- ARRAY_SIZE(tcg_target_call_iarg_regs
);
4546 reg
= tcg_target_call_iarg_regs
[arg_slot
];
4547 tcg_reg_free(s
, reg
, *allocated_regs
);
4548 tcg_out_addi_ptr(s
, reg
, ref_base
, ref_off
);
4549 tcg_regset_set_reg(*allocated_regs
, reg
);
4551 reg
= tcg_reg_alloc(s
, tcg_target_available_regs
[TCG_TYPE_PTR
],
4552 *allocated_regs
, 0, false);
4553 tcg_out_addi_ptr(s
, reg
, ref_base
, ref_off
);
4554 tcg_out_st(s
, TCG_TYPE_PTR
, reg
, TCG_REG_CALL_STACK
,
4555 TCG_TARGET_CALL_STACK_OFFSET
4556 + stk_slot
* sizeof(tcg_target_long
));
4560 static void tcg_reg_alloc_call(TCGContext
*s
, TCGOp
*op
)
4562 const int nb_oargs
= TCGOP_CALLO(op
);
4563 const int nb_iargs
= TCGOP_CALLI(op
);
4564 const TCGLifeData arg_life
= op
->life
;
4565 const TCGHelperInfo
*info
= tcg_call_info(op
);
4566 TCGRegSet allocated_regs
= s
->reserved_regs
;
4570 * Move inputs into place in reverse order,
4571 * so that we place stacked arguments first.
4573 for (i
= nb_iargs
- 1; i
>= 0; --i
) {
4574 const TCGCallArgumentLoc
*loc
= &info
->in
[i
];
4575 TCGTemp
*ts
= arg_temp(op
->args
[nb_oargs
+ i
]);
4577 switch (loc
->kind
) {
4578 case TCG_CALL_ARG_NORMAL
:
4579 case TCG_CALL_ARG_EXTEND_U
:
4580 case TCG_CALL_ARG_EXTEND_S
:
4581 load_arg_normal(s
, loc
, ts
, &allocated_regs
);
4583 case TCG_CALL_ARG_BY_REF
:
4584 load_arg_stk(s
, loc
->ref_slot
, ts
, allocated_regs
);
4585 load_arg_ref(s
, loc
->arg_slot
, TCG_REG_CALL_STACK
,
4586 TCG_TARGET_CALL_STACK_OFFSET
4587 + loc
->ref_slot
* sizeof(tcg_target_long
),
4590 case TCG_CALL_ARG_BY_REF_N
:
4591 load_arg_stk(s
, loc
->ref_slot
, ts
, allocated_regs
);
4594 g_assert_not_reached();
4598 /* Mark dead temporaries and free the associated registers. */
4599 for (i
= nb_oargs
; i
< nb_iargs
+ nb_oargs
; i
++) {
4600 if (IS_DEAD_ARG(i
)) {
4601 temp_dead(s
, arg_temp(op
->args
[i
]));
4605 /* Clobber call registers. */
4606 for (i
= 0; i
< TCG_TARGET_NB_REGS
; i
++) {
4607 if (tcg_regset_test_reg(tcg_target_call_clobber_regs
, i
)) {
4608 tcg_reg_free(s
, i
, allocated_regs
);
4613 * Save globals if they might be written by the helper,
4614 * sync them if they might be read.
4616 if (info
->flags
& TCG_CALL_NO_READ_GLOBALS
) {
4618 } else if (info
->flags
& TCG_CALL_NO_WRITE_GLOBALS
) {
4619 sync_globals(s
, allocated_regs
);
4621 save_globals(s
, allocated_regs
);
4625 * If the ABI passes a pointer to the returned struct as the first
4626 * argument, load that now. Pass a pointer to the output home slot.
4628 if (info
->out_kind
== TCG_CALL_RET_BY_REF
) {
4629 TCGTemp
*ts
= arg_temp(op
->args
[0]);
4631 if (!ts
->mem_allocated
) {
4632 temp_allocate_frame(s
, ts
);
4634 load_arg_ref(s
, 0, ts
->mem_base
->reg
, ts
->mem_offset
, &allocated_regs
);
4637 tcg_out_call(s
, tcg_call_func(op
), info
);
4639 /* Assign output registers and emit moves if needed. */
4640 switch (info
->out_kind
) {
4641 case TCG_CALL_RET_NORMAL
:
4642 for (i
= 0; i
< nb_oargs
; i
++) {
4643 TCGTemp
*ts
= arg_temp(op
->args
[i
]);
4644 TCGReg reg
= tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL
, i
);
4646 /* ENV should not be modified. */
4647 tcg_debug_assert(!temp_readonly(ts
));
4649 set_temp_val_reg(s
, ts
, reg
);
4650 ts
->mem_coherent
= 0;
4654 case TCG_CALL_RET_BY_VEC
:
4656 TCGTemp
*ts
= arg_temp(op
->args
[0]);
4658 tcg_debug_assert(ts
->base_type
== TCG_TYPE_I128
);
4659 tcg_debug_assert(ts
->temp_subindex
== 0);
4660 if (!ts
->mem_allocated
) {
4661 temp_allocate_frame(s
, ts
);
4663 tcg_out_st(s
, TCG_TYPE_V128
,
4664 tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC
, 0),
4665 ts
->mem_base
->reg
, ts
->mem_offset
);
4667 /* fall through to mark all parts in memory */
4669 case TCG_CALL_RET_BY_REF
:
4670 /* The callee has performed a write through the reference. */
4671 for (i
= 0; i
< nb_oargs
; i
++) {
4672 TCGTemp
*ts
= arg_temp(op
->args
[i
]);
4673 ts
->val_type
= TEMP_VAL_MEM
;
4678 g_assert_not_reached();
4681 /* Flush or discard output registers as needed. */
4682 for (i
= 0; i
< nb_oargs
; i
++) {
4683 TCGTemp
*ts
= arg_temp(op
->args
[i
]);
4684 if (NEED_SYNC_ARG(i
)) {
4685 temp_sync(s
, ts
, s
->reserved_regs
, 0, IS_DEAD_ARG(i
));
4686 } else if (IS_DEAD_ARG(i
)) {
4692 #ifdef CONFIG_PROFILER
4694 /* avoid copy/paste errors */
4695 #define PROF_ADD(to, from, field) \
4697 (to)->field += qatomic_read(&((from)->field)); \
4700 #define PROF_MAX(to, from, field) \
4702 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4703 if (val__ > (to)->field) { \
4704 (to)->field = val__; \
4708 /* Pass in a zero'ed @prof */
4710 void tcg_profile_snapshot(TCGProfile
*prof
, bool counters
, bool table
)
4712 unsigned int n_ctxs
= qatomic_read(&tcg_cur_ctxs
);
4715 for (i
= 0; i
< n_ctxs
; i
++) {
4716 TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4717 const TCGProfile
*orig
= &s
->prof
;
4720 PROF_ADD(prof
, orig
, cpu_exec_time
);
4721 PROF_ADD(prof
, orig
, tb_count1
);
4722 PROF_ADD(prof
, orig
, tb_count
);
4723 PROF_ADD(prof
, orig
, op_count
);
4724 PROF_MAX(prof
, orig
, op_count_max
);
4725 PROF_ADD(prof
, orig
, temp_count
);
4726 PROF_MAX(prof
, orig
, temp_count_max
);
4727 PROF_ADD(prof
, orig
, del_op_count
);
4728 PROF_ADD(prof
, orig
, code_in_len
);
4729 PROF_ADD(prof
, orig
, code_out_len
);
4730 PROF_ADD(prof
, orig
, search_out_len
);
4731 PROF_ADD(prof
, orig
, interm_time
);
4732 PROF_ADD(prof
, orig
, code_time
);
4733 PROF_ADD(prof
, orig
, la_time
);
4734 PROF_ADD(prof
, orig
, opt_time
);
4735 PROF_ADD(prof
, orig
, restore_count
);
4736 PROF_ADD(prof
, orig
, restore_time
);
4741 for (i
= 0; i
< NB_OPS
; i
++) {
4742 PROF_ADD(prof
, orig
, table_op_count
[i
]);
4751 static void tcg_profile_snapshot_counters(TCGProfile
*prof
)
4753 tcg_profile_snapshot(prof
, true, false);
4756 static void tcg_profile_snapshot_table(TCGProfile
*prof
)
4758 tcg_profile_snapshot(prof
, false, true);
4761 void tcg_dump_op_count(GString
*buf
)
4763 TCGProfile prof
= {};
4766 tcg_profile_snapshot_table(&prof
);
4767 for (i
= 0; i
< NB_OPS
; i
++) {
4768 g_string_append_printf(buf
, "%s %" PRId64
"\n", tcg_op_defs
[i
].name
,
4769 prof
.table_op_count
[i
]);
4773 int64_t tcg_cpu_exec_time(void)
4775 unsigned int n_ctxs
= qatomic_read(&tcg_cur_ctxs
);
4779 for (i
= 0; i
< n_ctxs
; i
++) {
4780 const TCGContext
*s
= qatomic_read(&tcg_ctxs
[i
]);
4781 const TCGProfile
*prof
= &s
->prof
;
4783 ret
+= qatomic_read(&prof
->cpu_exec_time
);
4788 void tcg_dump_op_count(GString
*buf
)
4790 g_string_append_printf(buf
, "[TCG profiler not compiled]\n");
4793 int64_t tcg_cpu_exec_time(void)
4795 error_report("%s: TCG profiler not compiled", __func__
);
4801 int tcg_gen_code(TCGContext
*s
, TranslationBlock
*tb
, target_ulong pc_start
)
4803 #ifdef CONFIG_PROFILER
4804 TCGProfile
*prof
= &s
->prof
;
4809 #ifdef CONFIG_PROFILER
4813 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4816 qatomic_set(&prof
->op_count
, prof
->op_count
+ n
);
4817 if (n
> prof
->op_count_max
) {
4818 qatomic_set(&prof
->op_count_max
, n
);
4822 qatomic_set(&prof
->temp_count
, prof
->temp_count
+ n
);
4823 if (n
> prof
->temp_count_max
) {
4824 qatomic_set(&prof
->temp_count_max
, n
);
4830 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP
)
4831 && qemu_log_in_addr_range(pc_start
))) {
4832 FILE *logfile
= qemu_log_trylock();
4834 fprintf(logfile
, "OP:\n");
4835 tcg_dump_ops(s
, logfile
, false);
4836 fprintf(logfile
, "\n");
4837 qemu_log_unlock(logfile
);
4842 #ifdef CONFIG_DEBUG_TCG
4843 /* Ensure all labels referenced have been emitted. */
4848 QSIMPLEQ_FOREACH(l
, &s
->labels
, next
) {
4849 if (unlikely(!l
->present
) && l
->refs
) {
4850 qemu_log_mask(CPU_LOG_TB_OP
,
4851 "$L%d referenced but not present.\n", l
->id
);
4859 #ifdef CONFIG_PROFILER
4860 qatomic_set(&prof
->opt_time
, prof
->opt_time
- profile_getclock());
4863 #ifdef USE_TCG_OPTIMIZATIONS
4867 #ifdef CONFIG_PROFILER
4868 qatomic_set(&prof
->opt_time
, prof
->opt_time
+ profile_getclock());
4869 qatomic_set(&prof
->la_time
, prof
->la_time
- profile_getclock());
4872 reachable_code_pass(s
);
4875 if (s
->nb_indirects
> 0) {
4877 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND
)
4878 && qemu_log_in_addr_range(pc_start
))) {
4879 FILE *logfile
= qemu_log_trylock();
4881 fprintf(logfile
, "OP before indirect lowering:\n");
4882 tcg_dump_ops(s
, logfile
, false);
4883 fprintf(logfile
, "\n");
4884 qemu_log_unlock(logfile
);
4888 /* Replace indirect temps with direct temps. */
4889 if (liveness_pass_2(s
)) {
4890 /* If changes were made, re-run liveness. */
4895 #ifdef CONFIG_PROFILER
4896 qatomic_set(&prof
->la_time
, prof
->la_time
+ profile_getclock());
4900 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT
)
4901 && qemu_log_in_addr_range(pc_start
))) {
4902 FILE *logfile
= qemu_log_trylock();
4904 fprintf(logfile
, "OP after optimization and liveness analysis:\n");
4905 tcg_dump_ops(s
, logfile
, true);
4906 fprintf(logfile
, "\n");
4907 qemu_log_unlock(logfile
);
4912 /* Initialize goto_tb jump offsets. */
4913 tb
->jmp_reset_offset
[0] = TB_JMP_OFFSET_INVALID
;
4914 tb
->jmp_reset_offset
[1] = TB_JMP_OFFSET_INVALID
;
4915 tb
->jmp_insn_offset
[0] = TB_JMP_OFFSET_INVALID
;
4916 tb
->jmp_insn_offset
[1] = TB_JMP_OFFSET_INVALID
;
4918 tcg_reg_alloc_start(s
);
4921 * Reset the buffer pointers when restarting after overflow.
4922 * TODO: Move this into translate-all.c with the rest of the
4923 * buffer management. Having only this done here is confusing.
4925 s
->code_buf
= tcg_splitwx_to_rw(tb
->tc
.ptr
);
4926 s
->code_ptr
= s
->code_buf
;
4928 #ifdef TCG_TARGET_NEED_LDST_LABELS
4929 QSIMPLEQ_INIT(&s
->ldst_labels
);
4931 #ifdef TCG_TARGET_NEED_POOL_LABELS
4932 s
->pool_labels
= NULL
;
4936 QTAILQ_FOREACH(op
, &s
->ops
, link
) {
4937 TCGOpcode opc
= op
->opc
;
4939 #ifdef CONFIG_PROFILER
4940 qatomic_set(&prof
->table_op_count
[opc
], prof
->table_op_count
[opc
] + 1);
4944 case INDEX_op_mov_i32
:
4945 case INDEX_op_mov_i64
:
4946 case INDEX_op_mov_vec
:
4947 tcg_reg_alloc_mov(s
, op
);
4949 case INDEX_op_dup_vec
:
4950 tcg_reg_alloc_dup(s
, op
);
4952 case INDEX_op_insn_start
:
4953 if (num_insns
>= 0) {
4954 size_t off
= tcg_current_code_size(s
);
4955 s
->gen_insn_end_off
[num_insns
] = off
;
4956 /* Assert that we do not overflow our stored offset. */
4957 assert(s
->gen_insn_end_off
[num_insns
] == off
);
4960 for (i
= 0; i
< TARGET_INSN_START_WORDS
; ++i
) {
4962 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4963 a
= deposit64(op
->args
[i
* 2], 32, 32, op
->args
[i
* 2 + 1]);
4967 s
->gen_insn_data
[num_insns
][i
] = a
;
4970 case INDEX_op_discard
:
4971 temp_dead(s
, arg_temp(op
->args
[0]));
4973 case INDEX_op_set_label
:
4974 tcg_reg_alloc_bb_end(s
, s
->reserved_regs
);
4975 tcg_out_label(s
, arg_label(op
->args
[0]));
4978 tcg_reg_alloc_call(s
, op
);
4980 case INDEX_op_exit_tb
:
4981 tcg_out_exit_tb(s
, op
->args
[0]);
4983 case INDEX_op_goto_tb
:
4984 tcg_out_goto_tb(s
, op
->args
[0]);
4986 case INDEX_op_dup2_vec
:
4987 if (tcg_reg_alloc_dup2(s
, op
)) {
4992 /* Sanity check that we've not introduced any unhandled opcodes. */
4993 tcg_debug_assert(tcg_op_supported(opc
));
4994 /* Note: in order to speed up the code, it would be much
4995 faster to have specialized register allocator functions for
4996 some common argument patterns */
4997 tcg_reg_alloc_op(s
, op
);
5000 /* Test for (pending) buffer overflow. The assumption is that any
5001 one operation beginning below the high water mark cannot overrun
5002 the buffer completely. Thus we can test for overflow after
5003 generating code without having to check during generation. */
5004 if (unlikely((void *)s
->code_ptr
> s
->code_gen_highwater
)) {
5007 /* Test for TB overflow, as seen by gen_insn_end_off. */
5008 if (unlikely(tcg_current_code_size(s
) > UINT16_MAX
)) {
5012 tcg_debug_assert(num_insns
>= 0);
5013 s
->gen_insn_end_off
[num_insns
] = tcg_current_code_size(s
);
5015 /* Generate TB finalization at the end of block */
5016 #ifdef TCG_TARGET_NEED_LDST_LABELS
5017 i
= tcg_out_ldst_finalize(s
);
5022 #ifdef TCG_TARGET_NEED_POOL_LABELS
5023 i
= tcg_out_pool_finalize(s
);
5028 if (!tcg_resolve_relocs(s
)) {
5032 #ifndef CONFIG_TCG_INTERPRETER
5033 /* flush instruction cache */
5034 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s
->code_buf
),
5035 (uintptr_t)s
->code_buf
,
5036 tcg_ptr_byte_diff(s
->code_ptr
, s
->code_buf
));
5039 return tcg_current_code_size(s
);
5042 #ifdef CONFIG_PROFILER
5043 void tcg_dump_info(GString
*buf
)
5045 TCGProfile prof
= {};
5046 const TCGProfile
*s
;
5048 int64_t tb_div_count
;
5051 tcg_profile_snapshot_counters(&prof
);
5053 tb_count
= s
->tb_count
;
5054 tb_div_count
= tb_count
? tb_count
: 1;
5055 tot
= s
->interm_time
+ s
->code_time
;
5057 g_string_append_printf(buf
, "JIT cycles %" PRId64
5058 " (%0.3f s at 2.4 GHz)\n",
5060 g_string_append_printf(buf
, "translated TBs %" PRId64
5061 " (aborted=%" PRId64
" %0.1f%%)\n",
5062 tb_count
, s
->tb_count1
- tb_count
,
5063 (double)(s
->tb_count1
- s
->tb_count
)
5064 / (s
->tb_count1
? s
->tb_count1
: 1) * 100.0);
5065 g_string_append_printf(buf
, "avg ops/TB %0.1f max=%d\n",
5066 (double)s
->op_count
/ tb_div_count
, s
->op_count_max
);
5067 g_string_append_printf(buf
, "deleted ops/TB %0.2f\n",
5068 (double)s
->del_op_count
/ tb_div_count
);
5069 g_string_append_printf(buf
, "avg temps/TB %0.2f max=%d\n",
5070 (double)s
->temp_count
/ tb_div_count
,
5072 g_string_append_printf(buf
, "avg host code/TB %0.1f\n",
5073 (double)s
->code_out_len
/ tb_div_count
);
5074 g_string_append_printf(buf
, "avg search data/TB %0.1f\n",
5075 (double)s
->search_out_len
/ tb_div_count
);
5077 g_string_append_printf(buf
, "cycles/op %0.1f\n",
5078 s
->op_count
? (double)tot
/ s
->op_count
: 0);
5079 g_string_append_printf(buf
, "cycles/in byte %0.1f\n",
5080 s
->code_in_len
? (double)tot
/ s
->code_in_len
: 0);
5081 g_string_append_printf(buf
, "cycles/out byte %0.1f\n",
5082 s
->code_out_len
? (double)tot
/ s
->code_out_len
: 0);
5083 g_string_append_printf(buf
, "cycles/search byte %0.1f\n",
5085 (double)tot
/ s
->search_out_len
: 0);
5089 g_string_append_printf(buf
, " gen_interm time %0.1f%%\n",
5090 (double)s
->interm_time
/ tot
* 100.0);
5091 g_string_append_printf(buf
, " gen_code time %0.1f%%\n",
5092 (double)s
->code_time
/ tot
* 100.0);
5093 g_string_append_printf(buf
, "optim./code time %0.1f%%\n",
5094 (double)s
->opt_time
/ (s
->code_time
?
5097 g_string_append_printf(buf
, "liveness/code time %0.1f%%\n",
5098 (double)s
->la_time
/ (s
->code_time
?
5099 s
->code_time
: 1) * 100.0);
5100 g_string_append_printf(buf
, "cpu_restore count %" PRId64
"\n",
5102 g_string_append_printf(buf
, " avg cycles %0.1f\n",
5104 (double)s
->restore_time
/ s
->restore_count
: 0);
5107 void tcg_dump_info(GString
*buf
)
5109 g_string_append_printf(buf
, "[TCG profiler not compiled]\n");
5113 #ifdef ELF_HOST_MACHINE
5114 /* In order to use this feature, the backend needs to do three things:
5116 (1) Define ELF_HOST_MACHINE to indicate both what value to
5117 put into the ELF image and to indicate support for the feature.
5119 (2) Define tcg_register_jit. This should create a buffer containing
5120 the contents of a .debug_frame section that describes the post-
5121 prologue unwind info for the tcg machine.
5123 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5126 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
5133 struct jit_code_entry
{
5134 struct jit_code_entry
*next_entry
;
5135 struct jit_code_entry
*prev_entry
;
5136 const void *symfile_addr
;
5137 uint64_t symfile_size
;
5140 struct jit_descriptor
{
5142 uint32_t action_flag
;
5143 struct jit_code_entry
*relevant_entry
;
5144 struct jit_code_entry
*first_entry
;
5147 void __jit_debug_register_code(void) __attribute__((noinline
));
5148 void __jit_debug_register_code(void)
5153 /* Must statically initialize the version, because GDB may check
5154 the version before we can set it. */
5155 struct jit_descriptor __jit_debug_descriptor
= { 1, 0, 0, 0 };
5157 /* End GDB interface. */
5159 static int find_string(const char *strtab
, const char *str
)
5161 const char *p
= strtab
+ 1;
5164 if (strcmp(p
, str
) == 0) {
5171 static void tcg_register_jit_int(const void *buf_ptr
, size_t buf_size
,
5172 const void *debug_frame
,
5173 size_t debug_frame_size
)
5175 struct __attribute__((packed
)) DebugInfo
{
5182 uintptr_t cu_low_pc
;
5183 uintptr_t cu_high_pc
;
5186 uintptr_t fn_low_pc
;
5187 uintptr_t fn_high_pc
;
5196 struct DebugInfo di
;
5201 struct ElfImage
*img
;
5203 static const struct ElfImage img_template
= {
5205 .e_ident
[EI_MAG0
] = ELFMAG0
,
5206 .e_ident
[EI_MAG1
] = ELFMAG1
,
5207 .e_ident
[EI_MAG2
] = ELFMAG2
,
5208 .e_ident
[EI_MAG3
] = ELFMAG3
,
5209 .e_ident
[EI_CLASS
] = ELF_CLASS
,
5210 .e_ident
[EI_DATA
] = ELF_DATA
,
5211 .e_ident
[EI_VERSION
] = EV_CURRENT
,
5213 .e_machine
= ELF_HOST_MACHINE
,
5214 .e_version
= EV_CURRENT
,
5215 .e_phoff
= offsetof(struct ElfImage
, phdr
),
5216 .e_shoff
= offsetof(struct ElfImage
, shdr
),
5217 .e_ehsize
= sizeof(ElfW(Shdr
)),
5218 .e_phentsize
= sizeof(ElfW(Phdr
)),
5220 .e_shentsize
= sizeof(ElfW(Shdr
)),
5221 .e_shnum
= ARRAY_SIZE(img
->shdr
),
5222 .e_shstrndx
= ARRAY_SIZE(img
->shdr
) - 1,
5223 #ifdef ELF_HOST_FLAGS
5224 .e_flags
= ELF_HOST_FLAGS
,
5227 .e_ident
[EI_OSABI
] = ELF_OSABI
,
5235 [0] = { .sh_type
= SHT_NULL
},
5236 /* Trick: The contents of code_gen_buffer are not present in
5237 this fake ELF file; that got allocated elsewhere. Therefore
5238 we mark .text as SHT_NOBITS (similar to .bss) so that readers
5239 will not look for contents. We can record any address. */
5241 .sh_type
= SHT_NOBITS
,
5242 .sh_flags
= SHF_EXECINSTR
| SHF_ALLOC
,
5244 [2] = { /* .debug_info */
5245 .sh_type
= SHT_PROGBITS
,
5246 .sh_offset
= offsetof(struct ElfImage
, di
),
5247 .sh_size
= sizeof(struct DebugInfo
),
5249 [3] = { /* .debug_abbrev */
5250 .sh_type
= SHT_PROGBITS
,
5251 .sh_offset
= offsetof(struct ElfImage
, da
),
5252 .sh_size
= sizeof(img
->da
),
5254 [4] = { /* .debug_frame */
5255 .sh_type
= SHT_PROGBITS
,
5256 .sh_offset
= sizeof(struct ElfImage
),
5258 [5] = { /* .symtab */
5259 .sh_type
= SHT_SYMTAB
,
5260 .sh_offset
= offsetof(struct ElfImage
, sym
),
5261 .sh_size
= sizeof(img
->sym
),
5263 .sh_link
= ARRAY_SIZE(img
->shdr
) - 1,
5264 .sh_entsize
= sizeof(ElfW(Sym
)),
5266 [6] = { /* .strtab */
5267 .sh_type
= SHT_STRTAB
,
5268 .sh_offset
= offsetof(struct ElfImage
, str
),
5269 .sh_size
= sizeof(img
->str
),
5273 [1] = { /* code_gen_buffer */
5274 .st_info
= ELF_ST_INFO(STB_GLOBAL
, STT_FUNC
),
5279 .len
= sizeof(struct DebugInfo
) - 4,
5281 .ptr_size
= sizeof(void *),
5283 .cu_lang
= 0x8001, /* DW_LANG_Mips_Assembler */
5285 .fn_name
= "code_gen_buffer"
5288 1, /* abbrev number (the cu) */
5289 0x11, 1, /* DW_TAG_compile_unit, has children */
5290 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
5291 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5292 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5293 0, 0, /* end of abbrev */
5294 2, /* abbrev number (the fn) */
5295 0x2e, 0, /* DW_TAG_subprogram, no children */
5296 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
5297 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5298 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5299 0, 0, /* end of abbrev */
5300 0 /* no more abbrev */
5302 .str
= "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5303 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5306 /* We only need a single jit entry; statically allocate it. */
5307 static struct jit_code_entry one_entry
;
5309 uintptr_t buf
= (uintptr_t)buf_ptr
;
5310 size_t img_size
= sizeof(struct ElfImage
) + debug_frame_size
;
5311 DebugFrameHeader
*dfh
;
5313 img
= g_malloc(img_size
);
5314 *img
= img_template
;
5316 img
->phdr
.p_vaddr
= buf
;
5317 img
->phdr
.p_paddr
= buf
;
5318 img
->phdr
.p_memsz
= buf_size
;
5320 img
->shdr
[1].sh_name
= find_string(img
->str
, ".text");
5321 img
->shdr
[1].sh_addr
= buf
;
5322 img
->shdr
[1].sh_size
= buf_size
;
5324 img
->shdr
[2].sh_name
= find_string(img
->str
, ".debug_info");
5325 img
->shdr
[3].sh_name
= find_string(img
->str
, ".debug_abbrev");
5327 img
->shdr
[4].sh_name
= find_string(img
->str
, ".debug_frame");
5328 img
->shdr
[4].sh_size
= debug_frame_size
;
5330 img
->shdr
[5].sh_name
= find_string(img
->str
, ".symtab");
5331 img
->shdr
[6].sh_name
= find_string(img
->str
, ".strtab");
5333 img
->sym
[1].st_name
= find_string(img
->str
, "code_gen_buffer");
5334 img
->sym
[1].st_value
= buf
;
5335 img
->sym
[1].st_size
= buf_size
;
5337 img
->di
.cu_low_pc
= buf
;
5338 img
->di
.cu_high_pc
= buf
+ buf_size
;
5339 img
->di
.fn_low_pc
= buf
;
5340 img
->di
.fn_high_pc
= buf
+ buf_size
;
5342 dfh
= (DebugFrameHeader
*)(img
+ 1);
5343 memcpy(dfh
, debug_frame
, debug_frame_size
);
5344 dfh
->fde
.func_start
= buf
;
5345 dfh
->fde
.func_len
= buf_size
;
5348 /* Enable this block to be able to debug the ELF image file creation.
5349 One can use readelf, objdump, or other inspection utilities. */
5351 g_autofree
char *jit
= g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5352 FILE *f
= fopen(jit
, "w+b");
5354 if (fwrite(img
, img_size
, 1, f
) != img_size
) {
5355 /* Avoid stupid unused return value warning for fwrite. */
5362 one_entry
.symfile_addr
= img
;
5363 one_entry
.symfile_size
= img_size
;
5365 __jit_debug_descriptor
.action_flag
= JIT_REGISTER_FN
;
5366 __jit_debug_descriptor
.relevant_entry
= &one_entry
;
5367 __jit_debug_descriptor
.first_entry
= &one_entry
;
5368 __jit_debug_register_code();
5371 /* No support for the feature. Provide the entry point expected by exec.c,
5372 and implement the internal function we declared earlier. */
5374 static void tcg_register_jit_int(const void *buf
, size_t size
,
5375 const void *debug_frame
,
5376 size_t debug_frame_size
)
5380 void tcg_register_jit(const void *buf
, size_t buf_size
)
5383 #endif /* ELF_HOST_MACHINE */
5385 #if !TCG_TARGET_MAYBE_vec
5386 void tcg_expand_vec_op(TCGOpcode o
, TCGType t
, unsigned e
, TCGArg a0
, ...)
5388 g_assert_not_reached();