X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=tcg%2Ftcg.c;h=be2c33c400cf0f9a479d2a6664035948281872e3;hb=c4107e8208d0222f9b328691b519aaee4101db87;hp=8f26916b99352ade6c0c8af09bde55c6b81fd097;hpb=c1f543b739086733024e31d74a52d9e41553f316;p=mirror_qemu.git diff --git a/tcg/tcg.c b/tcg/tcg.c index 8f26916b99..be2c33c400 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -30,8 +30,10 @@ /* Define to jump the ELF file used to communicate with GDB. */ #undef DEBUG_JIT +#include "qemu/error-report.h" #include "qemu/cutils.h" #include "qemu/host-utils.h" +#include "qemu/qemu-print.h" #include "qemu/timer.h" /* Note: the long term plan is to reduce the dependencies on the QEMU @@ -43,6 +45,10 @@ #include "exec/cpu-common.h" #include "exec/exec-all.h" +#if !defined(CONFIG_USER_ONLY) +#include "hw/boards.h" +#endif + #include "tcg-op.h" #if UINTPTR_MAX == UINT32_MAX @@ -65,7 +71,7 @@ static void tcg_target_init(TCGContext *s); static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); static void tcg_target_qemu_prologue(TCGContext *s); -static void patch_reloc(tcg_insn_unit *code_ptr, int type, +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend); /* The CIE and FDE header definitions will be common to all hosts. */ @@ -101,16 +107,37 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, const char *ct_str, TCGType type); static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, intptr_t arg2); -static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args); #if TCG_TARGET_MAYBE_vec +static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg src); +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg base, intptr_t offset); +static void tcg_out_dupi_vec(TCGContext *s, TCGType type, + TCGReg dst, tcg_target_long arg); static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg *args, const int *const_args); #else +static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg src) +{ + g_assert_not_reached(); +} +static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg base, intptr_t offset) +{ + g_assert_not_reached(); +} +static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, + TCGReg dst, tcg_target_long arg) +{ + g_assert_not_reached(); +} static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg *args, const int *const_args) @@ -126,7 +153,7 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *target); static int tcg_target_const_match(tcg_target_long val, TCGType type, const TCGArgConstraint *arg_ct); #ifdef TCG_TARGET_NEED_LDST_LABELS -static bool tcg_out_ldst_finalize(TCGContext *s); +static int tcg_out_ldst_finalize(TCGContext *s); #endif #define TCG_HIGHWATER 1024 @@ -261,35 +288,17 @@ static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p, static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, TCGLabel *l, intptr_t addend) { - TCGRelocation *r; + TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation)); - if (l->has_value) { - /* FIXME: This may break relocations on RISC targets that - modify instruction fields in place. The caller may not have - written the initial value. */ - patch_reloc(code_ptr, type, l->u.value, addend); - } else { - /* add a new relocation entry */ - r = tcg_malloc(sizeof(TCGRelocation)); - r->type = type; - r->ptr = code_ptr; - r->addend = addend; - r->next = l->u.first_reloc; - l->u.first_reloc = r; - } + r->type = type; + r->ptr = code_ptr; + r->addend = addend; + QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next); } static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) { - intptr_t value = (intptr_t)ptr; - TCGRelocation *r; - tcg_debug_assert(!l->has_value); - - for (r = l->u.first_reloc; r != NULL; r = r->next) { - patch_reloc(r->ptr, r->type, value, r->addend); - } - l->has_value = 1; l->u.value_ptr = ptr; } @@ -299,13 +308,32 @@ TCGLabel *gen_new_label(void) TCGContext *s = tcg_ctx; TCGLabel *l = tcg_malloc(sizeof(TCGLabel)); - *l = (TCGLabel){ - .id = s->nb_labels++ - }; + memset(l, 0, sizeof(TCGLabel)); + l->id = s->nb_labels++; + QSIMPLEQ_INIT(&l->relocs); + + QSIMPLEQ_INSERT_TAIL(&s->labels, l, next); return l; } +static bool tcg_resolve_relocs(TCGContext *s) +{ + TCGLabel *l; + + QSIMPLEQ_FOREACH(l, &s->labels, next) { + TCGRelocation *r; + uintptr_t value = l->u.value; + + QSIMPLEQ_FOREACH(r, &l->relocs, next) { + if (!patch_reloc(r->ptr, r->type, value, r->addend)) { + return false; + } + } + } + return true; +} + static void set_jmp_reset_offset(TCGContext *s, int which) { size_t off = tcg_current_code_size(s); @@ -596,6 +624,10 @@ static size_t tcg_n_regions(void) size_t i; /* Use a single region if all we have is one vCPU thread */ +#if !defined(CONFIG_USER_ONLY) + MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int max_cpus = ms->smp.max_cpus; +#endif if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { return 1; } @@ -728,6 +760,7 @@ void tcg_register_thread(void) #else void tcg_register_thread(void) { + MachineState *ms = MACHINE(qdev_get_machine()); TCGContext *s = g_malloc(sizeof(*s)); unsigned int i, n; bool err; @@ -745,7 +778,7 @@ void tcg_register_thread(void) /* Claim an entry in tcg_ctxs */ n = atomic_fetch_inc(&n_tcg_ctxs); - g_assert(n < max_cpus); + g_assert(n < ms->smp.max_cpus); atomic_set(&tcg_ctxs[n], s); tcg_ctx = s; @@ -955,6 +988,8 @@ void tcg_context_init(TCGContext *s) tcg_ctxs = &tcg_ctx; n_tcg_ctxs = 1; #else + MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int max_cpus = ms->smp.max_cpus; tcg_ctxs = g_new(TCGContext *, max_cpus); #endif @@ -1016,8 +1051,8 @@ void tcg_prologue_init(TCGContext *s) #ifdef TCG_TARGET_NEED_POOL_LABELS /* Allow the prologue to put e.g. guest_base into a pool entry. */ { - bool ok = tcg_out_pool_finalize(s); - tcg_debug_assert(ok); + int result = tcg_out_pool_finalize(s); + tcg_debug_assert(result == 0); } #endif @@ -1089,6 +1124,7 @@ void tcg_func_start(TCGContext *s) QTAILQ_INIT(&s->ops); QTAILQ_INIT(&s->free_ops); + QSIMPLEQ_INIT(&s->labels); } static inline TCGTemp *tcg_temp_alloc(TCGContext *s) @@ -1416,6 +1452,8 @@ bool tcg_op_supported(TCGOpcode op) return TCG_TARGET_HAS_extract_i32; case INDEX_op_sextract_i32: return TCG_TARGET_HAS_sextract_i32; + case INDEX_op_extract2_i32: + return TCG_TARGET_HAS_extract2_i32; case INDEX_op_add2_i32: return TCG_TARGET_HAS_add2_i32; case INDEX_op_sub2_i32: @@ -1513,6 +1551,8 @@ bool tcg_op_supported(TCGOpcode op) return TCG_TARGET_HAS_extract_i64; case INDEX_op_sextract_i64: return TCG_TARGET_HAS_sextract_i64; + case INDEX_op_extract2_i64: + return TCG_TARGET_HAS_extract2_i64; case INDEX_op_extrl_i64_i32: return TCG_TARGET_HAS_extrl_i64_i32; case INDEX_op_extrh_i64_i32: @@ -1571,6 +1611,7 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_mov_vec: case INDEX_op_dup_vec: case INDEX_op_dupi_vec: + case INDEX_op_dupm_vec: case INDEX_op_ld_vec: case INDEX_op_st_vec: case INDEX_op_add_vec: @@ -1586,6 +1627,8 @@ bool tcg_op_supported(TCGOpcode op) return have_vec && TCG_TARGET_HAS_not_vec; case INDEX_op_neg_vec: return have_vec && TCG_TARGET_HAS_neg_vec; + case INDEX_op_abs_vec: + return have_vec && TCG_TARGET_HAS_abs_vec; case INDEX_op_andc_vec: return have_vec && TCG_TARGET_HAS_andc_vec; case INDEX_op_orc_vec: @@ -1604,6 +1647,20 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: return have_vec && TCG_TARGET_HAS_shv_vec; + case INDEX_op_ssadd_vec: + case INDEX_op_usadd_vec: + case INDEX_op_sssub_vec: + case INDEX_op_ussub_vec: + return have_vec && TCG_TARGET_HAS_sat_vec; + case INDEX_op_smin_vec: + case INDEX_op_umin_vec: + case INDEX_op_smax_vec: + case INDEX_op_umax_vec: + return have_vec && TCG_TARGET_HAS_minmax_vec; + case INDEX_op_bitsel_vec: + return have_vec && TCG_TARGET_HAS_bitsel_vec; + case INDEX_op_cmpsel_vec: + return have_vec && TCG_TARGET_HAS_cmpsel_vec; default: tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); @@ -1884,7 +1941,21 @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { [MO_ALIGN_64 >> MO_ASHIFT] = "al64+", }; -void tcg_dump_ops(TCGContext *s) +static inline bool tcg_regset_single(TCGRegSet d) +{ + return (d & (d - 1)) == 0; +} + +static inline TCGReg tcg_regset_first(TCGRegSet d) +{ + if (TCG_TARGET_NB_REGS <= 32) { + return ctz32(d); + } else { + return ctz64(d); + } +} + +static void tcg_dump_ops(TCGContext *s, bool have_prefs) { char buf[128]; TCGOp *op; @@ -1899,6 +1970,7 @@ void tcg_dump_ops(TCGContext *s) def = &tcg_op_defs[c]; if (c == INDEX_op_insn_start) { + nb_oargs = 0; col += qemu_log("\n ----"); for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { @@ -1969,6 +2041,7 @@ void tcg_dump_ops(TCGContext *s) case INDEX_op_setcond_i64: case INDEX_op_movcond_i64: case INDEX_op_cmp_vec: + case INDEX_op_cmpsel_vec: if (op->args[k] < ARRAY_SIZE(cond_name) && cond_name[op->args[k]]) { col += qemu_log(",%s", cond_name[op->args[k++]]); @@ -2018,12 +2091,15 @@ void tcg_dump_ops(TCGContext *s) col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]); } } - if (op->life) { - unsigned life = op->life; - for (; col < 48; ++col) { + if (have_prefs || op->life) { + for (; col < 40; ++col) { putc(' ', qemu_logfile); } + } + + if (op->life) { + unsigned life = op->life; if (life & (SYNC_ARG * 3)) { qemu_log(" sync:"); @@ -2043,6 +2119,33 @@ void tcg_dump_ops(TCGContext *s) } } } + + if (have_prefs) { + for (i = 0; i < nb_oargs; ++i) { + TCGRegSet set = op->output_pref[i]; + + if (i == 0) { + qemu_log(" pref="); + } else { + qemu_log(","); + } + if (set == 0) { + qemu_log("none"); + } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) { + qemu_log("all"); +#ifdef CONFIG_DEBUG_TCG + } else if (tcg_regset_single(set)) { + TCGReg reg = tcg_regset_first(set); + qemu_log("%s", tcg_target_reg_names[reg]); +#endif + } else if (TCG_TARGET_NB_REGS <= 32) { + qemu_log("%#x", (uint32_t)set); + } else { + qemu_log("%#" PRIx64, (uint64_t)set); + } + } + } + qemu_log("\n"); } } @@ -2168,6 +2271,26 @@ static void process_op_defs(TCGContext *s) void tcg_op_remove(TCGContext *s, TCGOp *op) { + TCGLabel *label; + + switch (op->opc) { + case INDEX_op_br: + label = arg_label(op->args[0]); + label->refs--; + break; + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + label = arg_label(op->args[3]); + label->refs--; + break; + case INDEX_op_brcond2_i32: + label = arg_label(op->args[5]); + label->refs--; + break; + default: + break; + } + QTAILQ_REMOVE(&s->ops, op, link); QTAILQ_INSERT_TAIL(&s->free_ops, op, link); s->nb_ops--; @@ -2202,59 +2325,195 @@ TCGOp *tcg_emit_op(TCGOpcode opc) return op; } -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, int nargs) +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) { TCGOp *new_op = tcg_op_alloc(opc); QTAILQ_INSERT_BEFORE(old_op, new_op, link); return new_op; } -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, int nargs) +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) { TCGOp *new_op = tcg_op_alloc(opc); QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); return new_op; } +/* Reachable analysis : remove unreachable code. */ +static void reachable_code_pass(TCGContext *s) +{ + TCGOp *op, *op_next; + bool dead = false; + + QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { + bool remove = dead; + TCGLabel *label; + int call_flags; + + switch (op->opc) { + case INDEX_op_set_label: + label = arg_label(op->args[0]); + if (label->refs == 0) { + /* + * While there is an occasional backward branch, virtually + * all branches generated by the translators are forward. + * Which means that generally we will have already removed + * all references to the label that will be, and there is + * little to be gained by iterating. + */ + remove = true; + } else { + /* Once we see a label, insns become live again. */ + dead = false; + remove = false; + + /* + * Optimization can fold conditional branches to unconditional. + * If we find a label with one reference which is preceded by + * an unconditional branch to it, remove both. This needed to + * wait until the dead code in between them was removed. + */ + if (label->refs == 1) { + TCGOp *op_prev = QTAILQ_PREV(op, link); + if (op_prev->opc == INDEX_op_br && + label == arg_label(op_prev->args[0])) { + tcg_op_remove(s, op_prev); + remove = true; + } + } + } + break; + + case INDEX_op_br: + case INDEX_op_exit_tb: + case INDEX_op_goto_ptr: + /* Unconditional branches; everything following is dead. */ + dead = true; + break; + + case INDEX_op_call: + /* Notice noreturn helper calls, raising exceptions. */ + call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; + if (call_flags & TCG_CALL_NO_RETURN) { + dead = true; + } + break; + + case INDEX_op_insn_start: + /* Never remove -- we need to keep these for unwind. */ + remove = false; + break; + + default: + break; + } + + if (remove) { + tcg_op_remove(s, op); + } + } +} + #define TS_DEAD 1 #define TS_MEM 2 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n))) #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n))) +/* For liveness_pass_1, the register preferences for a given temp. */ +static inline TCGRegSet *la_temp_pref(TCGTemp *ts) +{ + return ts->state_ptr; +} + +/* For liveness_pass_1, reset the preferences for a given temp to the + * maximal regset for its type. + */ +static inline void la_reset_pref(TCGTemp *ts) +{ + *la_temp_pref(ts) + = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]); +} + /* liveness analysis: end of function: all temps are dead, and globals should be in memory. */ -static void tcg_la_func_end(TCGContext *s) +static void la_func_end(TCGContext *s, int ng, int nt) { - int ng = s->nb_globals; - int nt = s->nb_temps; int i; for (i = 0; i < ng; ++i) { s->temps[i].state = TS_DEAD | TS_MEM; + la_reset_pref(&s->temps[i]); } for (i = ng; i < nt; ++i) { s->temps[i].state = TS_DEAD; + la_reset_pref(&s->temps[i]); } } /* liveness analysis: end of basic block: all temps are dead, globals and local temps should be in memory. */ -static void tcg_la_bb_end(TCGContext *s) +static void la_bb_end(TCGContext *s, int ng, int nt) { - int ng = s->nb_globals; - int nt = s->nb_temps; int i; for (i = 0; i < ng; ++i) { s->temps[i].state = TS_DEAD | TS_MEM; + la_reset_pref(&s->temps[i]); } for (i = ng; i < nt; ++i) { s->temps[i].state = (s->temps[i].temp_local ? TS_DEAD | TS_MEM : TS_DEAD); + la_reset_pref(&s->temps[i]); + } +} + +/* liveness analysis: sync globals back to memory. */ +static void la_global_sync(TCGContext *s, int ng) +{ + int i; + + for (i = 0; i < ng; ++i) { + int state = s->temps[i].state; + s->temps[i].state = state | TS_MEM; + if (state == TS_DEAD) { + /* If the global was previously dead, reset prefs. */ + la_reset_pref(&s->temps[i]); + } + } +} + +/* liveness analysis: sync globals back to memory and kill. */ +static void la_global_kill(TCGContext *s, int ng) +{ + int i; + + for (i = 0; i < ng; i++) { + s->temps[i].state = TS_DEAD | TS_MEM; + la_reset_pref(&s->temps[i]); + } +} + +/* liveness analysis: note live globals crossing calls. */ +static void la_cross_call(TCGContext *s, int nt) +{ + TCGRegSet mask = ~tcg_target_call_clobber_regs; + int i; + + for (i = 0; i < nt; i++) { + TCGTemp *ts = &s->temps[i]; + if (!(ts->state & TS_DEAD)) { + TCGRegSet *pset = la_temp_pref(ts); + TCGRegSet set = *pset; + + set &= mask; + /* If the combination is not possible, restart. */ + if (set == 0) { + set = tcg_target_available_regs[ts->type] & mask; + } + *pset = set; + } } } @@ -2264,16 +2523,25 @@ static void tcg_la_bb_end(TCGContext *s) static void liveness_pass_1(TCGContext *s) { int nb_globals = s->nb_globals; + int nb_temps = s->nb_temps; TCGOp *op, *op_prev; + TCGRegSet *prefs; + int i; - tcg_la_func_end(s); + prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); + for (i = 0; i < nb_temps; ++i) { + s->temps[i].state_ptr = prefs + i; + } + + /* ??? Should be redundant with the exit_tb that ends the TB. */ + la_func_end(s, nb_globals, nb_temps); - QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) { - int i, nb_iargs, nb_oargs; + QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { + int nb_iargs, nb_oargs; TCGOpcode opc_new, opc_new2; bool have_opc_new2; TCGLifeData arg_life = 0; - TCGTemp *arg_ts; + TCGTemp *ts; TCGOpcode opc = op->opc; const TCGOpDef *def = &tcg_op_defs[opc]; @@ -2281,6 +2549,7 @@ static void liveness_pass_1(TCGContext *s) case INDEX_op_call: { int call_flags; + int nb_call_regs; nb_oargs = TCGOP_CALLO(op); nb_iargs = TCGOP_CALLI(op); @@ -2289,53 +2558,74 @@ static void liveness_pass_1(TCGContext *s) /* pure functions can be removed if their result is unused */ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state != TS_DEAD) { + ts = arg_temp(op->args[i]); + if (ts->state != TS_DEAD) { goto do_not_remove_call; } } goto do_remove; - } else { - do_not_remove_call: + } + do_not_remove_call: - /* output args are dead */ - for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } - if (arg_ts->state & TS_MEM) { - arg_life |= SYNC_ARG << i; - } - arg_ts->state = TS_DEAD; + /* Output args are dead. */ + for (i = 0; i < nb_oargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; } + if (ts->state & TS_MEM) { + arg_life |= SYNC_ARG << i; + } + ts->state = TS_DEAD; + la_reset_pref(ts); - if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | - TCG_CALL_NO_READ_GLOBALS))) { - /* globals should go back to memory */ - for (i = 0; i < nb_globals; i++) { - s->temps[i].state = TS_DEAD | TS_MEM; - } - } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { - /* globals should be synced to memory */ - for (i = 0; i < nb_globals; i++) { - s->temps[i].state |= TS_MEM; - } + /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ + op->output_pref[i] = 0; + } + + if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | + TCG_CALL_NO_READ_GLOBALS))) { + la_global_kill(s, nb_globals); + } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { + la_global_sync(s, nb_globals); + } + + /* Record arguments that die in this helper. */ + for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + ts = arg_temp(op->args[i]); + if (ts && ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; } + } - /* record arguments that die in this helper */ - for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts && arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } + /* For all live registers, remove call-clobbered prefs. */ + la_cross_call(s, nb_temps); + + nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); + + /* Input arguments are live for preceding opcodes. */ + for (i = 0; i < nb_iargs; i++) { + ts = arg_temp(op->args[i + nb_oargs]); + if (ts && ts->state & TS_DEAD) { + /* For those arguments that die, and will be allocated + * in registers, clear the register set for that arg, + * to be filled in below. For args that will be on + * the stack, reset to any available reg. + */ + *la_temp_pref(ts) + = (i < nb_call_regs ? 0 : + tcg_target_available_regs[ts->type]); + ts->state &= ~TS_DEAD; } - /* input arguments are live for preceding opcodes */ - for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts) { - arg_ts->state &= ~TS_DEAD; - } + } + + /* For each input argument, add its input register to prefs. + If a temp is used once, this produces a single set bit. */ + for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { + ts = arg_temp(op->args[i + nb_oargs]); + if (ts) { + tcg_regset_set_reg(*la_temp_pref(ts), + tcg_target_call_iarg_regs[i]); } } } @@ -2344,7 +2634,9 @@ static void liveness_pass_1(TCGContext *s) break; case INDEX_op_discard: /* mark the temporary as dead */ - arg_temp(op->args[0])->state = TS_DEAD; + ts = arg_temp(op->args[0]); + ts->state = TS_DEAD; + la_reset_pref(ts); break; case INDEX_op_add2_i32: @@ -2439,43 +2731,96 @@ static void liveness_pass_1(TCGContext *s) goto do_not_remove; } } - do_remove: - tcg_op_remove(s, op); - } else { - do_not_remove: - /* output args are dead */ - for (i = 0; i < nb_oargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } - if (arg_ts->state & TS_MEM) { - arg_life |= SYNC_ARG << i; - } - arg_ts->state = TS_DEAD; + goto do_remove; + } + goto do_not_remove; + + do_remove: + tcg_op_remove(s, op); + break; + + do_not_remove: + for (i = 0; i < nb_oargs; i++) { + ts = arg_temp(op->args[i]); + + /* Remember the preference of the uses that followed. */ + op->output_pref[i] = *la_temp_pref(ts); + + /* Output args are dead. */ + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; + } + if (ts->state & TS_MEM) { + arg_life |= SYNC_ARG << i; } + ts->state = TS_DEAD; + la_reset_pref(ts); + } - /* if end of basic block, update */ - if (def->flags & TCG_OPF_BB_END) { - tcg_la_bb_end(s); - } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { - /* globals should be synced to memory */ - for (i = 0; i < nb_globals; i++) { - s->temps[i].state |= TS_MEM; - } + /* If end of basic block, update. */ + if (def->flags & TCG_OPF_BB_EXIT) { + la_func_end(s, nb_globals, nb_temps); + } else if (def->flags & TCG_OPF_BB_END) { + la_bb_end(s, nb_globals, nb_temps); + } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { + la_global_sync(s, nb_globals); + if (def->flags & TCG_OPF_CALL_CLOBBER) { + la_cross_call(s, nb_temps); } + } - /* record arguments that die in this opcode */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - arg_ts = arg_temp(op->args[i]); - if (arg_ts->state & TS_DEAD) { - arg_life |= DEAD_ARG << i; - } + /* Record arguments that die in this opcode. */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + arg_life |= DEAD_ARG << i; + } + } + + /* Input arguments are live for preceding opcodes. */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + ts = arg_temp(op->args[i]); + if (ts->state & TS_DEAD) { + /* For operands that were dead, initially allow + all regs for the type. */ + *la_temp_pref(ts) = tcg_target_available_regs[ts->type]; + ts->state &= ~TS_DEAD; + } + } + + /* Incorporate constraints for this operand. */ + switch (opc) { + case INDEX_op_mov_i32: + case INDEX_op_mov_i64: + /* Note that these are TCG_OPF_NOT_PRESENT and do not + have proper constraints. That said, special case + moves to propagate preferences backward. */ + if (IS_DEAD_ARG(1)) { + *la_temp_pref(arg_temp(op->args[0])) + = *la_temp_pref(arg_temp(op->args[1])); } - /* input arguments are live for preceding opcodes */ + break; + + default: for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - arg_temp(op->args[i])->state &= ~TS_DEAD; + const TCGArgConstraint *ct = &def->args_ct[i]; + TCGRegSet set, *pset; + + ts = arg_temp(op->args[i]); + pset = la_temp_pref(ts); + set = *pset; + + set &= ct->u.regs; + if (ct->ct & TCG_CT_IALIAS) { + set &= op->output_pref[ct->alias_index]; + } + /* If the combination is not possible, restart. */ + if (set == 0) { + set = ct->u.regs; + } + *pset = set; } + break; } break; } @@ -2549,7 +2894,7 @@ static bool liveness_pass_2(TCGContext *s) TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_ld_i32 : INDEX_op_ld_i64); - TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); + TCGOp *lop = tcg_op_insert_before(s, op, lopc); lop->args[0] = temp_arg(dir_ts); lop->args[1] = temp_arg(arg_ts->mem_base); @@ -2618,7 +2963,7 @@ static bool liveness_pass_2(TCGContext *s) TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_st_i32 : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGOp *sop = tcg_op_insert_after(s, op, sopc); sop->args[0] = temp_arg(dir_ts); sop->args[1] = temp_arg(arg_ts->mem_base); @@ -2726,7 +3071,7 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) s->current_frame_offset += sizeof(tcg_target_long); } -static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet); +static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); /* Mark a temporary as free or dead. If 'free_or_dead' is negative, mark it free; otherwise mark it dead. */ @@ -2754,8 +3099,8 @@ static inline void temp_dead(TCGContext *s, TCGTemp *ts) registers needs to be allocated to store a constant. If 'free_or_dead' is non-zero, subsequently release the temporary; if it is positive, the temp is dead; if it is negative, the temp is free. */ -static void temp_sync(TCGContext *s, TCGTemp *ts, - TCGRegSet allocated_regs, int free_or_dead) +static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs, + TCGRegSet preferred_regs, int free_or_dead) { if (ts->fixed_reg) { return; @@ -2775,7 +3120,7 @@ static void temp_sync(TCGContext *s, TCGTemp *ts, break; } temp_load(s, ts, tcg_target_available_regs[ts->type], - allocated_regs); + allocated_regs, preferred_regs); /* fallthrough */ case TEMP_VAL_REG: @@ -2802,35 +3147,76 @@ static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs) { TCGTemp *ts = s->reg_to_temp[reg]; if (ts != NULL) { - temp_sync(s, ts, allocated_regs, -1); + temp_sync(s, ts, allocated_regs, 0, -1); } } -/* Allocate a register belonging to reg1 & ~reg2 */ -static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, - TCGRegSet allocated_regs, bool rev) +/** + * tcg_reg_alloc: + * @required_regs: Set of registers in which we must allocate. + * @allocated_regs: Set of registers which must be avoided. + * @preferred_regs: Set of registers we should prefer. + * @rev: True if we search the registers in "indirect" order. + * + * The allocated register must be in @required_regs & ~@allocated_regs, + * but if we can put it in @preferred_regs we may save a move later. + */ +static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, + TCGRegSet allocated_regs, + TCGRegSet preferred_regs, bool rev) { - int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order); + int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order); + TCGRegSet reg_ct[2]; const int *order; - TCGReg reg; - TCGRegSet reg_ct; - reg_ct = desired_regs & ~allocated_regs; + reg_ct[1] = required_regs & ~allocated_regs; + tcg_debug_assert(reg_ct[1] != 0); + reg_ct[0] = reg_ct[1] & preferred_regs; + + /* Skip the preferred_regs option if it cannot be satisfied, + or if the preference made no difference. */ + f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; + order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; - /* first try free registers */ - for(i = 0; i < n; i++) { - reg = order[i]; - if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL) - return reg; + /* Try free registers, preferences first. */ + for (j = f; j < 2; j++) { + TCGRegSet set = reg_ct[j]; + + if (tcg_regset_single(set)) { + /* One register in the set. */ + TCGReg reg = tcg_regset_first(set); + if (s->reg_to_temp[reg] == NULL) { + return reg; + } + } else { + for (i = 0; i < n; i++) { + TCGReg reg = order[i]; + if (s->reg_to_temp[reg] == NULL && + tcg_regset_test_reg(set, reg)) { + return reg; + } + } + } } - /* XXX: do better spill choice */ - for(i = 0; i < n; i++) { - reg = order[i]; - if (tcg_regset_test_reg(reg_ct, reg)) { + /* We must spill something. */ + for (j = f; j < 2; j++) { + TCGRegSet set = reg_ct[j]; + + if (tcg_regset_single(set)) { + /* One register in the set. */ + TCGReg reg = tcg_regset_first(set); tcg_reg_free(s, reg, allocated_regs); return reg; + } else { + for (i = 0; i < n; i++) { + TCGReg reg = order[i]; + if (tcg_regset_test_reg(set, reg)) { + tcg_reg_free(s, reg, allocated_regs); + return reg; + } + } } } @@ -2840,7 +3226,7 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs, /* Make sure the temporary is in a register. If needed, allocate the register from DESIRED while avoiding ALLOCATED. */ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, - TCGRegSet allocated_regs) + TCGRegSet allocated_regs, TCGRegSet preferred_regs) { TCGReg reg; @@ -2848,12 +3234,14 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, case TEMP_VAL_REG: return; case TEMP_VAL_CONST: - reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); + reg = tcg_reg_alloc(s, desired_regs, allocated_regs, + preferred_regs, ts->indirect_base); tcg_out_movi(s, ts->type, reg, ts->val); ts->mem_coherent = 0; break; case TEMP_VAL_MEM: - reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base); + reg = tcg_reg_alloc(s, desired_regs, allocated_regs, + preferred_regs, ts->indirect_base); tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); ts->mem_coherent = 1; break; @@ -2922,14 +3310,15 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) save_globals(s, allocated_regs); } +/* + * Specialized code generation for INDEX_op_movi_*. + */ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, - tcg_target_ulong val, TCGLifeData arg_life) + tcg_target_ulong val, TCGLifeData arg_life, + TCGRegSet preferred_regs) { - if (ots->fixed_reg) { - /* For fixed registers, we do not do any constant propagation. */ - tcg_out_movi(s, ots->type, ots->reg, val); - return; - } + /* ENV should not be modified. */ + tcg_debug_assert(!ots->fixed_reg); /* The movi is not explicitly generated here. */ if (ots->val_type == TEMP_VAL_REG) { @@ -2939,7 +3328,7 @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, ots->val = val; ots->mem_coherent = 0; if (NEED_SYNC_ARG(0)) { - temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0)); + temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0)); } else if (IS_DEAD_ARG(0)) { temp_dead(s, ots); } @@ -2950,20 +3339,27 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op) TCGTemp *ots = arg_temp(op->args[0]); tcg_target_ulong val = op->args[1]; - tcg_reg_alloc_do_movi(s, ots, val, op->life); + tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]); } +/* + * Specialized code generation for INDEX_op_mov_*. + */ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) { const TCGLifeData arg_life = op->life; - TCGRegSet allocated_regs; + TCGRegSet allocated_regs, preferred_regs; TCGTemp *ts, *ots; TCGType otype, itype; allocated_regs = s->reserved_regs; + preferred_regs = op->output_pref[0]; ots = arg_temp(op->args[0]); ts = arg_temp(op->args[1]); + /* ENV should not be modified. */ + tcg_debug_assert(!ots->fixed_reg); + /* Note that otype != itype for no-op truncation. */ otype = ots->type; itype = ts->type; @@ -2974,7 +3370,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) if (IS_DEAD_ARG(1)) { temp_dead(s, ts); } - tcg_reg_alloc_do_movi(s, ots, val, arg_life); + tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs); return; } @@ -2983,11 +3379,12 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) the SOURCE value into its own register first, that way we don't have to reload SOURCE the next time it is used. */ if (ts->val_type == TEMP_VAL_MEM) { - temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs); + temp_load(s, ts, tcg_target_available_regs[itype], + allocated_regs, preferred_regs); } tcg_debug_assert(ts->val_type == TEMP_VAL_REG); - if (IS_DEAD_ARG(0) && !ots->fixed_reg) { + if (IS_DEAD_ARG(0)) { /* mov to a non-saved dead register makes no sense (even with liveness analysis disabled). */ tcg_debug_assert(NEED_SYNC_ARG(0)); @@ -3000,7 +3397,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) } temp_dead(s, ots); } else { - if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { + if (IS_DEAD_ARG(1) && !ts->fixed_reg) { /* the mov can be suppressed */ if (ots->val_type == TEMP_VAL_REG) { s->reg_to_temp[ots->reg] = NULL; @@ -3013,19 +3410,147 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) input one. */ tcg_regset_set_reg(allocated_regs, ts->reg); ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], - allocated_regs, ots->indirect_base); + allocated_regs, preferred_regs, + ots->indirect_base); + } + if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { + /* + * Cross register class move not supported. + * Store the source register into the destination slot + * and leave the destination temp as TEMP_VAL_MEM. + */ + assert(!ots->fixed_reg); + if (!ts->mem_allocated) { + temp_allocate_frame(s, ots); + } + tcg_out_st(s, ts->type, ts->reg, + ots->mem_base->reg, ots->mem_offset); + ots->mem_coherent = 1; + temp_free_or_dead(s, ots, -1); + return; } - tcg_out_mov(s, otype, ots->reg, ts->reg); } ots->val_type = TEMP_VAL_REG; ots->mem_coherent = 0; s->reg_to_temp[ots->reg] = ots; if (NEED_SYNC_ARG(0)) { - temp_sync(s, ots, allocated_regs, 0); + temp_sync(s, ots, allocated_regs, 0, 0); } } } +/* + * Specialized code generation for INDEX_op_dup_vec. + */ +static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) +{ + const TCGLifeData arg_life = op->life; + TCGRegSet dup_out_regs, dup_in_regs; + TCGTemp *its, *ots; + TCGType itype, vtype; + intptr_t endian_fixup; + unsigned vece; + bool ok; + + ots = arg_temp(op->args[0]); + its = arg_temp(op->args[1]); + + /* ENV should not be modified. */ + tcg_debug_assert(!ots->fixed_reg); + + itype = its->type; + vece = TCGOP_VECE(op); + vtype = TCGOP_VECL(op) + TCG_TYPE_V64; + + if (its->val_type == TEMP_VAL_CONST) { + /* Propagate constant via movi -> dupi. */ + tcg_target_ulong val = its->val; + if (IS_DEAD_ARG(1)) { + temp_dead(s, its); + } + tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); + return; + } + + dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs; + dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs; + + /* Allocate the output register now. */ + if (ots->val_type != TEMP_VAL_REG) { + TCGRegSet allocated_regs = s->reserved_regs; + + if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { + /* Make sure to not spill the input register. */ + tcg_regset_set_reg(allocated_regs, its->reg); + } + ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, + op->output_pref[0], ots->indirect_base); + ots->val_type = TEMP_VAL_REG; + ots->mem_coherent = 0; + s->reg_to_temp[ots->reg] = ots; + } + + switch (its->val_type) { + case TEMP_VAL_REG: + /* + * The dup constriaints must be broad, covering all possible VECE. + * However, tcg_op_dup_vec() gets to see the VECE and we allow it + * to fail, indicating that extra moves are required for that case. + */ + if (tcg_regset_test_reg(dup_in_regs, its->reg)) { + if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { + goto done; + } + /* Try again from memory or a vector input register. */ + } + if (!its->mem_coherent) { + /* + * The input register is not synced, and so an extra store + * would be required to use memory. Attempt an integer-vector + * register move first. We do not have a TCGRegSet for this. + */ + if (tcg_out_mov(s, itype, ots->reg, its->reg)) { + break; + } + /* Sync the temp back to its slot and load from there. */ + temp_sync(s, its, s->reserved_regs, 0, 0); + } + /* fall through */ + + case TEMP_VAL_MEM: +#ifdef HOST_WORDS_BIGENDIAN + endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; + endian_fixup -= 1 << vece; +#else + endian_fixup = 0; +#endif + if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, + its->mem_offset + endian_fixup)) { + goto done; + } + tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); + break; + + default: + g_assert_not_reached(); + } + + /* We now have a vector input register, so dup must succeed. */ + ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); + tcg_debug_assert(ok); + + done: + if (IS_DEAD_ARG(1)) { + temp_dead(s, its); + } + if (NEED_SYNC_ARG(0)) { + temp_sync(s, ots, s->reserved_regs, 0, 0); + } + if (IS_DEAD_ARG(0)) { + temp_dead(s, ots); + } +} + static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) { const TCGLifeData arg_life = op->life; @@ -3053,6 +3578,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* satisfy input constraints */ for (k = 0; k < nb_iargs; k++) { + TCGRegSet i_preferred_regs, o_preferred_regs; + i = def->sorted_args[nb_oargs + k]; arg = op->args[i]; arg_ct = &def->args_ct[i]; @@ -3063,17 +3590,18 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* constant is OK for instruction */ const_args[i] = 1; new_args[i] = ts->val; - goto iarg_end; + continue; } - temp_load(s, ts, arg_ct->u.regs, i_allocated_regs); - + i_preferred_regs = o_preferred_regs = 0; if (arg_ct->ct & TCG_CT_IALIAS) { + o_preferred_regs = op->output_pref[arg_ct->alias_index]; if (ts->fixed_reg) { /* if fixed register, we must allocate a new register if the alias is not the same register */ - if (arg != op->args[arg_ct->alias_index]) + if (arg != op->args[arg_ct->alias_index]) { goto allocate_in_reg; + } } else { /* if the input is aliased to an output and if it is not dead after the instruction, we must allocate @@ -3081,33 +3609,50 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) if (!IS_DEAD_ARG(i)) { goto allocate_in_reg; } + /* check if the current register has already been allocated for another input aliased to an output */ - int k2, i2; - for (k2 = 0 ; k2 < k ; k2++) { - i2 = def->sorted_args[nb_oargs + k2]; - if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && - (new_args[i2] == ts->reg)) { - goto allocate_in_reg; + if (ts->val_type == TEMP_VAL_REG) { + int k2, i2; + reg = ts->reg; + for (k2 = 0 ; k2 < k ; k2++) { + i2 = def->sorted_args[nb_oargs + k2]; + if ((def->args_ct[i2].ct & TCG_CT_IALIAS) && + reg == new_args[i2]) { + goto allocate_in_reg; + } } } + i_preferred_regs = o_preferred_regs; } } + + temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs); reg = ts->reg; + if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { /* nothing to do : the constraint is satisfied */ } else { allocate_in_reg: /* allocate a new register matching the constraint and move the temporary register into it */ + temp_load(s, ts, tcg_target_available_regs[ts->type], + i_allocated_regs, 0); reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs, - ts->indirect_base); - tcg_out_mov(s, ts->type, reg, ts->reg); + o_preferred_regs, ts->indirect_base); + if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { + /* + * Cross register class move not supported. Sync the + * temp back to its slot and load from there. + */ + temp_sync(s, ts, i_allocated_regs, 0, 0); + tcg_out_ld(s, ts->type, reg, + ts->mem_base->reg, ts->mem_offset); + } } new_args[i] = reg; const_args[i] = 0; tcg_regset_set_reg(i_allocated_regs, reg); - iarg_end: ; } /* mark dead temporaries and free the associated registers */ @@ -3140,37 +3685,33 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) arg = op->args[i]; arg_ct = &def->args_ct[i]; ts = arg_temp(arg); + + /* ENV should not be modified. */ + tcg_debug_assert(!ts->fixed_reg); + if ((arg_ct->ct & TCG_CT_ALIAS) && !const_args[arg_ct->alias_index]) { reg = new_args[arg_ct->alias_index]; } else if (arg_ct->ct & TCG_CT_NEWREG) { reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs | o_allocated_regs, - ts->indirect_base); + op->output_pref[k], ts->indirect_base); } else { - /* if fixed register, we try to use it */ - reg = ts->reg; - if (ts->fixed_reg && - tcg_regset_test_reg(arg_ct->u.regs, reg)) { - goto oarg_end; - } reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs, - ts->indirect_base); + op->output_pref[k], ts->indirect_base); } tcg_regset_set_reg(o_allocated_regs, reg); - /* if a fixed register is used, then a move will be done afterwards */ - if (!ts->fixed_reg) { - if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = NULL; - } - ts->val_type = TEMP_VAL_REG; - ts->reg = reg; - /* temp value is modified, so the value kept in memory is - potentially not the same */ - ts->mem_coherent = 0; - s->reg_to_temp[reg] = ts; - } - oarg_end: + if (ts->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ts->reg] = NULL; + } + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + /* + * Temp value is modified, so the value kept in memory is + * potentially not the same. + */ + ts->mem_coherent = 0; + s->reg_to_temp[reg] = ts; new_args[i] = reg; } } @@ -3186,12 +3727,12 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* move the outputs in the correct register if needed */ for(i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); - reg = new_args[i]; - if (ts->fixed_reg && ts->reg != reg) { - tcg_out_mov(s, ts->type, ts->reg, reg); - } + + /* ENV should not be modified. */ + tcg_debug_assert(!ts->fixed_reg); + if (NEED_SYNC_ARG(i)) { - temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i)); + temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i)); } else if (IS_DEAD_ARG(i)) { temp_dead(s, ts); } @@ -3247,7 +3788,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) if (arg != TCG_CALL_DUMMY_ARG) { ts = arg_temp(arg); temp_load(s, ts, tcg_target_available_regs[ts->type], - s->reserved_regs); + s->reserved_regs, 0); tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); } #ifndef TCG_TARGET_STACK_GROWSUP @@ -3262,17 +3803,26 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) if (arg != TCG_CALL_DUMMY_ARG) { ts = arg_temp(arg); reg = tcg_target_call_iarg_regs[i]; - tcg_reg_free(s, reg, allocated_regs); if (ts->val_type == TEMP_VAL_REG) { if (ts->reg != reg) { - tcg_out_mov(s, ts->type, reg, ts->reg); + tcg_reg_free(s, reg, allocated_regs); + if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { + /* + * Cross register class move not supported. Sync the + * temp back to its slot and load from there. + */ + temp_sync(s, ts, allocated_regs, 0, 0); + tcg_out_ld(s, ts->type, reg, + ts->mem_base->reg, ts->mem_offset); + } } } else { TCGRegSet arg_set = 0; + tcg_reg_free(s, reg, allocated_regs); tcg_regset_set_reg(arg_set, reg); - temp_load(s, ts, arg_set, allocated_regs); + temp_load(s, ts, arg_set, allocated_regs, 0); } tcg_regset_set_reg(allocated_regs, reg); @@ -3309,26 +3859,23 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) for(i = 0; i < nb_oargs; i++) { arg = op->args[i]; ts = arg_temp(arg); + + /* ENV should not be modified. */ + tcg_debug_assert(!ts->fixed_reg); + reg = tcg_target_call_oarg_regs[i]; tcg_debug_assert(s->reg_to_temp[reg] == NULL); - - if (ts->fixed_reg) { - if (ts->reg != reg) { - tcg_out_mov(s, ts->type, ts->reg, reg); - } - } else { - if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = NULL; - } - ts->val_type = TEMP_VAL_REG; - ts->reg = reg; - ts->mem_coherent = 0; - s->reg_to_temp[reg] = ts; - if (NEED_SYNC_ARG(i)) { - temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i)); - } else if (IS_DEAD_ARG(i)) { - temp_dead(s, ts); - } + if (ts->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ts->reg] = NULL; + } + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 0; + s->reg_to_temp[reg] = ts; + if (NEED_SYNC_ARG(i)) { + temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); + } else if (IS_DEAD_ARG(i)) { + temp_dead(s, ts); } } } @@ -3361,6 +3908,7 @@ void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) const TCGProfile *orig = &s->prof; if (counters) { + PROF_ADD(prof, orig, cpu_exec_time); PROF_ADD(prof, orig, tb_count1); PROF_ADD(prof, orig, tb_count); PROF_ADD(prof, orig, op_count); @@ -3401,21 +3949,42 @@ static void tcg_profile_snapshot_table(TCGProfile *prof) tcg_profile_snapshot(prof, false, true); } -void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) +void tcg_dump_op_count(void) { TCGProfile prof = {}; int i; tcg_profile_snapshot_table(&prof); for (i = 0; i < NB_OPS; i++) { - cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, + qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, prof.table_op_count[i]); } } + +int64_t tcg_cpu_exec_time(void) +{ + unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); + unsigned int i; + int64_t ret = 0; + + for (i = 0; i < n_ctxs; i++) { + const TCGContext *s = atomic_read(&tcg_ctxs[i]); + const TCGProfile *prof = &s->prof; + + ret += atomic_read(&prof->cpu_exec_time); + } + return ret; +} #else -void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf) +void tcg_dump_op_count(void) { - cpu_fprintf(f, "[TCG profiler not compiled]\n"); + qemu_printf("[TCG profiler not compiled]\n"); +} + +int64_t tcg_cpu_exec_time(void) +{ + error_report("%s: TCG profiler not compiled", __func__); + exit(EXIT_FAILURE); } #endif @@ -3453,12 +4022,29 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) && qemu_log_in_addr_range(tb->pc))) { qemu_log_lock(); qemu_log("OP:\n"); - tcg_dump_ops(s); + tcg_dump_ops(s, false); qemu_log("\n"); qemu_log_unlock(); } #endif +#ifdef CONFIG_DEBUG_TCG + /* Ensure all labels referenced have been emitted. */ + { + TCGLabel *l; + bool error = false; + + QSIMPLEQ_FOREACH(l, &s->labels, next) { + if (unlikely(!l->present) && l->refs) { + qemu_log_mask(CPU_LOG_TB_OP, + "$L%d referenced but not present.\n", l->id); + error = true; + } + } + assert(!error); + } +#endif + #ifdef CONFIG_PROFILER atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); #endif @@ -3472,6 +4058,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) atomic_set(&prof->la_time, prof->la_time - profile_getclock()); #endif + reachable_code_pass(s); liveness_pass_1(s); if (s->nb_indirects > 0) { @@ -3480,7 +4067,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) && qemu_log_in_addr_range(tb->pc))) { qemu_log_lock(); qemu_log("OP before indirect lowering:\n"); - tcg_dump_ops(s); + tcg_dump_ops(s, false); qemu_log("\n"); qemu_log_unlock(); } @@ -3501,7 +4088,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) && qemu_log_in_addr_range(tb->pc))) { qemu_log_lock(); qemu_log("OP after optimization and liveness analysis:\n"); - tcg_dump_ops(s); + tcg_dump_ops(s, true); qemu_log("\n"); qemu_log_unlock(); } @@ -3538,6 +4125,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) case INDEX_op_dupi_vec: tcg_reg_alloc_movi(s, op); break; + case INDEX_op_dup_vec: + tcg_reg_alloc_dup(s, op); + break; case INDEX_op_insn_start: if (num_insns >= 0) { size_t off = tcg_current_code_size(s); @@ -3585,21 +4175,30 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { return -1; } + /* Test for TB overflow, as seen by gen_insn_end_off. */ + if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) { + return -2; + } } tcg_debug_assert(num_insns >= 0); s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); /* Generate TB finalization at the end of block */ #ifdef TCG_TARGET_NEED_LDST_LABELS - if (!tcg_out_ldst_finalize(s)) { - return -1; + i = tcg_out_ldst_finalize(s); + if (i < 0) { + return i; } #endif #ifdef TCG_TARGET_NEED_POOL_LABELS - if (!tcg_out_pool_finalize(s)) { - return -1; + i = tcg_out_pool_finalize(s); + if (i < 0) { + return i; } #endif + if (!tcg_resolve_relocs(s)) { + return -2; + } /* flush instruction cache */ flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); @@ -3608,7 +4207,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) } #ifdef CONFIG_PROFILER -void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) +void tcg_dump_info(void) { TCGProfile prof = {}; const TCGProfile *s; @@ -3622,52 +4221,53 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) tb_div_count = tb_count ? tb_count : 1; tot = s->interm_time + s->code_time; - cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", + qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", tot, tot / 2.4e9); - cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", + qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64 + " %0.1f%%)\n", tb_count, s->tb_count1 - tb_count, (double)(s->tb_count1 - s->tb_count) / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); - cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", + qemu_printf("avg ops/TB %0.1f max=%d\n", (double)s->op_count / tb_div_count, s->op_count_max); - cpu_fprintf(f, "deleted ops/TB %0.2f\n", + qemu_printf("deleted ops/TB %0.2f\n", (double)s->del_op_count / tb_div_count); - cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", + qemu_printf("avg temps/TB %0.2f max=%d\n", (double)s->temp_count / tb_div_count, s->temp_count_max); - cpu_fprintf(f, "avg host code/TB %0.1f\n", + qemu_printf("avg host code/TB %0.1f\n", (double)s->code_out_len / tb_div_count); - cpu_fprintf(f, "avg search data/TB %0.1f\n", + qemu_printf("avg search data/TB %0.1f\n", (double)s->search_out_len / tb_div_count); - cpu_fprintf(f, "cycles/op %0.1f\n", + qemu_printf("cycles/op %0.1f\n", s->op_count ? (double)tot / s->op_count : 0); - cpu_fprintf(f, "cycles/in byte %0.1f\n", + qemu_printf("cycles/in byte %0.1f\n", s->code_in_len ? (double)tot / s->code_in_len : 0); - cpu_fprintf(f, "cycles/out byte %0.1f\n", + qemu_printf("cycles/out byte %0.1f\n", s->code_out_len ? (double)tot / s->code_out_len : 0); - cpu_fprintf(f, "cycles/search byte %0.1f\n", + qemu_printf("cycles/search byte %0.1f\n", s->search_out_len ? (double)tot / s->search_out_len : 0); if (tot == 0) { tot = 1; } - cpu_fprintf(f, " gen_interm time %0.1f%%\n", + qemu_printf(" gen_interm time %0.1f%%\n", (double)s->interm_time / tot * 100.0); - cpu_fprintf(f, " gen_code time %0.1f%%\n", + qemu_printf(" gen_code time %0.1f%%\n", (double)s->code_time / tot * 100.0); - cpu_fprintf(f, "optim./code time %0.1f%%\n", + qemu_printf("optim./code time %0.1f%%\n", (double)s->opt_time / (s->code_time ? s->code_time : 1) * 100.0); - cpu_fprintf(f, "liveness/code time %0.1f%%\n", + qemu_printf("liveness/code time %0.1f%%\n", (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); - cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", + qemu_printf("cpu_restore count %" PRId64 "\n", s->restore_count); - cpu_fprintf(f, " avg cycles %0.1f\n", + qemu_printf(" avg cycles %0.1f\n", s->restore_count ? (double)s->restore_time / s->restore_count : 0); } #else -void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) +void tcg_dump_info(void) { - cpu_fprintf(f, "[TCG profiler not compiled]\n"); + qemu_printf("[TCG profiler not compiled]\n"); } #endif