#include "qemu/osdep.h"
#include "qemu/int128.h"
+#include "qemu/interval-tree.h"
#include "tcg/tcg-op-common.h"
#include "tcg-internal.h"
glue(glue(case INDEX_op_, x), _i64): \
glue(glue(case INDEX_op_, x), _vec)
+typedef struct MemCopyInfo {
+ IntervalTreeNode itree;
+ QSIMPLEQ_ENTRY (MemCopyInfo) next;
+ TCGTemp *ts;
+ TCGType type;
+} MemCopyInfo;
+
typedef struct TempOptInfo {
bool is_const;
TCGTemp *prev_copy;
TCGTemp *next_copy;
+ QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
uint64_t val;
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
TCGOp *prev_mb;
TCGTempSet temps_used;
+ IntervalTreeRoot mem_copy;
+ QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
+
/* In flight values from optimization. */
uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
return ts_info(ts)->is_const;
}
+static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
+{
+ TempOptInfo *ti = ts_info(ts);
+ return ti->is_const && ti->val == val;
+}
+
static inline bool arg_is_const(TCGArg arg)
{
return ts_is_const(arg_temp(arg));
}
-static inline bool ts_is_copy(TCGTemp *ts)
+static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
{
- return ts_info(ts)->next_copy != ts;
+ return ts_is_const_val(arg_temp(arg), val);
}
-/* Reset TEMP's state, possibly removing the temp for the list of copies. */
-static void reset_ts(TCGTemp *ts)
+static inline bool ts_is_copy(TCGTemp *ts)
{
- TempOptInfo *ti = ts_info(ts);
- TempOptInfo *pi = ts_info(ti->prev_copy);
- TempOptInfo *ni = ts_info(ti->next_copy);
-
- ni->prev_copy = ti->prev_copy;
- pi->next_copy = ti->next_copy;
- ti->next_copy = ts;
- ti->prev_copy = ts;
- ti->is_const = false;
- ti->z_mask = -1;
- ti->s_mask = 0;
+ return ts_info(ts)->next_copy != ts;
}
-static void reset_temp(TCGArg arg)
+static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
{
- reset_ts(arg_temp(arg));
+ return a->kind < b->kind ? b : a;
}
/* Initialize and activate a temporary. */
ti->next_copy = ts;
ti->prev_copy = ts;
+ QSIMPLEQ_INIT(&ti->mem_copy);
if (ts->kind == TEMP_CONST) {
ti->is_const = true;
ti->val = ts->val;
}
}
-static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
+static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
{
- TCGTemp *i, *g, *l;
+ IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
+ return r ? container_of(r, MemCopyInfo, itree) : NULL;
+}
+
+static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
+{
+ IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
+ return r ? container_of(r, MemCopyInfo, itree) : NULL;
+}
+
+static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
+{
+ TCGTemp *ts = mc->ts;
+ TempOptInfo *ti = ts_info(ts);
+
+ interval_tree_remove(&mc->itree, &ctx->mem_copy);
+ QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
+ QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
+}
+
+static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
+{
+ while (true) {
+ MemCopyInfo *mc = mem_copy_first(ctx, s, l);
+ if (!mc) {
+ break;
+ }
+ remove_mem_copy(ctx, mc);
+ }
+}
+
+static void remove_mem_copy_all(OptContext *ctx)
+{
+ remove_mem_copy_in(ctx, 0, -1);
+ tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
+}
+
+static TCGTemp *find_better_copy(TCGTemp *ts)
+{
+ TCGTemp *i, *ret;
/* If this is already readonly, we can't do better. */
if (temp_readonly(ts)) {
return ts;
}
- g = l = NULL;
+ ret = ts;
for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
- if (temp_readonly(i)) {
- return i;
- } else if (i->kind > ts->kind) {
- if (i->kind == TEMP_GLOBAL) {
- g = i;
- } else if (i->kind == TEMP_TB) {
- l = i;
+ ret = cmp_better_copy(ret, i);
+ }
+ return ret;
+}
+
+static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
+{
+ TempOptInfo *si = ts_info(src_ts);
+ TempOptInfo *di = ts_info(dst_ts);
+ MemCopyInfo *mc;
+
+ QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
+ tcg_debug_assert(mc->ts == src_ts);
+ mc->ts = dst_ts;
+ }
+ QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
+}
+
+/* Reset TEMP's state, possibly removing the temp for the list of copies. */
+static void reset_ts(OptContext *ctx, TCGTemp *ts)
+{
+ TempOptInfo *ti = ts_info(ts);
+ TCGTemp *pts = ti->prev_copy;
+ TCGTemp *nts = ti->next_copy;
+ TempOptInfo *pi = ts_info(pts);
+ TempOptInfo *ni = ts_info(nts);
+
+ ni->prev_copy = ti->prev_copy;
+ pi->next_copy = ti->next_copy;
+ ti->next_copy = ts;
+ ti->prev_copy = ts;
+ ti->is_const = false;
+ ti->z_mask = -1;
+ ti->s_mask = 0;
+
+ if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
+ if (ts == nts) {
+ /* Last temp copy being removed, the mem copies die. */
+ MemCopyInfo *mc;
+ QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
+ interval_tree_remove(&mc->itree, &ctx->mem_copy);
}
+ QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
+ } else {
+ move_mem_copies(find_better_copy(nts), ts);
}
}
+}
- /* If we didn't find a better representation, return the same temp. */
- return g ? g : l ? l : ts;
+static void reset_temp(OptContext *ctx, TCGArg arg)
+{
+ reset_ts(ctx, arg_temp(arg));
+}
+
+static void record_mem_copy(OptContext *ctx, TCGType type,
+ TCGTemp *ts, intptr_t start, intptr_t last)
+{
+ MemCopyInfo *mc;
+ TempOptInfo *ti;
+
+ mc = QSIMPLEQ_FIRST(&ctx->mem_free);
+ if (mc) {
+ QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
+ } else {
+ mc = tcg_malloc(sizeof(*mc));
+ }
+
+ memset(mc, 0, sizeof(*mc));
+ mc->itree.start = start;
+ mc->itree.last = last;
+ mc->type = type;
+ interval_tree_insert(&mc->itree, &ctx->mem_copy);
+
+ ts = find_better_copy(ts);
+ ti = ts_info(ts);
+ mc->ts = ts;
+ QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
}
static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
}
+static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
+{
+ MemCopyInfo *mc;
+
+ for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
+ if (mc->itree.start == s && mc->type == type) {
+ return find_better_copy(mc->ts);
+ }
+ }
+ return NULL;
+}
+
+static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
+{
+ TCGType type = ctx->type;
+ TCGTemp *ts;
+
+ if (type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+
+ ts = tcg_constant_internal(type, val);
+ init_ts_info(ctx, ts);
+
+ return temp_arg(ts);
+}
+
+static TCGArg arg_new_temp(OptContext *ctx)
+{
+ TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
+ init_ts_info(ctx, ts);
+ return temp_arg(ts);
+}
+
static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
{
TCGTemp *dst_ts = arg_temp(dst);
return true;
}
- reset_ts(dst_ts);
+ reset_ts(ctx, dst_ts);
di = ts_info(dst_ts);
si = ts_info(src_ts);
si->next_copy = dst_ts;
di->is_const = si->is_const;
di->val = si->val;
+
+ if (!QSIMPLEQ_EMPTY(&si->mem_copy)
+ && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
+ move_mem_copies(dst_ts, src_ts);
+ }
}
return true;
}
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
TCGArg dst, uint64_t val)
{
- TCGTemp *tv;
-
- if (ctx->type == TCG_TYPE_I32) {
- val = (int32_t)val;
- }
-
/* Convert movi to mov with constant temp. */
- tv = tcg_constant_internal(ctx->type, val);
- init_ts_info(ctx, tv);
- return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
+ return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
}
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
return x <= y;
case TCG_COND_GTU:
return x > y;
- default:
- g_assert_not_reached();
+ case TCG_COND_TSTEQ:
+ return (x & y) == 0;
+ case TCG_COND_TSTNE:
+ return (x & y) != 0;
+ case TCG_COND_ALWAYS:
+ case TCG_COND_NEVER:
+ break;
}
+ g_assert_not_reached();
}
static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
return x <= y;
case TCG_COND_GTU:
return x > y;
- default:
- g_assert_not_reached();
+ case TCG_COND_TSTEQ:
+ return (x & y) == 0;
+ case TCG_COND_TSTNE:
+ return (x & y) != 0;
+ case TCG_COND_ALWAYS:
+ case TCG_COND_NEVER:
+ break;
}
+ g_assert_not_reached();
}
-static bool do_constant_folding_cond_eq(TCGCond c)
+static int do_constant_folding_cond_eq(TCGCond c)
{
switch (c) {
case TCG_COND_GT:
case TCG_COND_LEU:
case TCG_COND_EQ:
return 1;
- default:
- g_assert_not_reached();
+ case TCG_COND_TSTEQ:
+ case TCG_COND_TSTNE:
+ return -1;
+ case TCG_COND_ALWAYS:
+ case TCG_COND_NEVER:
+ break;
}
+ g_assert_not_reached();
}
/*
}
} else if (args_are_copies(x, y)) {
return do_constant_folding_cond_eq(c);
- } else if (arg_is_const(y) && arg_info(y)->val == 0) {
+ } else if (arg_is_const_val(y, 0)) {
switch (c) {
case TCG_COND_LTU:
+ case TCG_COND_TSTNE:
return 0;
case TCG_COND_GEU:
+ case TCG_COND_TSTEQ:
return 1;
default:
return -1;
return -1;
}
-/*
- * Return -1 if the condition can't be simplified,
- * and the result of the condition (0 or 1) if it can.
- */
-static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
-{
- TCGArg al = p1[0], ah = p1[1];
- TCGArg bl = p2[0], bh = p2[1];
-
- if (arg_is_const(bl) && arg_is_const(bh)) {
- tcg_target_ulong blv = arg_info(bl)->val;
- tcg_target_ulong bhv = arg_info(bh)->val;
- uint64_t b = deposit64(blv, 32, 32, bhv);
-
- if (arg_is_const(al) && arg_is_const(ah)) {
- tcg_target_ulong alv = arg_info(al)->val;
- tcg_target_ulong ahv = arg_info(ah)->val;
- uint64_t a = deposit64(alv, 32, 32, ahv);
- return do_constant_folding_cond_64(a, b, c);
- }
- if (b == 0) {
- switch (c) {
- case TCG_COND_LTU:
- return 0;
- case TCG_COND_GEU:
- return 1;
- default:
- break;
- }
- }
- }
- if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
- return do_constant_folding_cond_eq(c);
- }
- return -1;
-}
-
/**
* swap_commutative:
* @dest: TCGArg of the destination argument, or NO_DEST.
return false;
}
+/*
+ * Return -1 if the condition can't be simplified,
+ * and the result of the condition (0 or 1) if it can.
+ */
+static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
+ TCGArg *p1, TCGArg *p2, TCGArg *pcond)
+{
+ TCGCond cond;
+ bool swap;
+ int r;
+
+ swap = swap_commutative(dest, p1, p2);
+ cond = *pcond;
+ if (swap) {
+ *pcond = cond = tcg_swap_cond(cond);
+ }
+
+ r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
+ if (r >= 0) {
+ return r;
+ }
+ if (!is_tst_cond(cond)) {
+ return -1;
+ }
+
+ /*
+ * TSTNE x,x -> NE x,0
+ * TSTNE x,-1 -> NE x,0
+ */
+ if (args_are_copies(*p1, *p2) || arg_is_const_val(*p2, -1)) {
+ *p2 = arg_new_constant(ctx, 0);
+ *pcond = tcg_tst_eqne_cond(cond);
+ return -1;
+ }
+
+ /* TSTNE x,sign -> LT x,0 */
+ if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32
+ ? INT32_MIN : INT64_MIN))) {
+ *p2 = arg_new_constant(ctx, 0);
+ *pcond = tcg_tst_ltge_cond(cond);
+ return -1;
+ }
+
+ /* Expand to AND with a temporary if no backend support. */
+ if (!TCG_TARGET_HAS_tst) {
+ TCGOpcode and_opc = (ctx->type == TCG_TYPE_I32
+ ? INDEX_op_and_i32 : INDEX_op_and_i64);
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, and_opc, 3);
+ TCGArg tmp = arg_new_temp(ctx);
+
+ op2->args[0] = tmp;
+ op2->args[1] = *p1;
+ op2->args[2] = *p2;
+
+ *p1 = tmp;
+ *p2 = arg_new_constant(ctx, 0);
+ *pcond = tcg_tst_eqne_cond(cond);
+ }
+ return -1;
+}
+
+static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
+{
+ TCGArg al, ah, bl, bh;
+ TCGCond c;
+ bool swap;
+ int r;
+
+ swap = swap_commutative2(args, args + 2);
+ c = args[4];
+ if (swap) {
+ args[4] = c = tcg_swap_cond(c);
+ }
+
+ al = args[0];
+ ah = args[1];
+ bl = args[2];
+ bh = args[3];
+
+ if (arg_is_const(bl) && arg_is_const(bh)) {
+ tcg_target_ulong blv = arg_info(bl)->val;
+ tcg_target_ulong bhv = arg_info(bh)->val;
+ uint64_t b = deposit64(blv, 32, 32, bhv);
+
+ if (arg_is_const(al) && arg_is_const(ah)) {
+ tcg_target_ulong alv = arg_info(al)->val;
+ tcg_target_ulong ahv = arg_info(ah)->val;
+ uint64_t a = deposit64(alv, 32, 32, ahv);
+
+ r = do_constant_folding_cond_64(a, b, c);
+ if (r >= 0) {
+ return r;
+ }
+ }
+
+ if (b == 0) {
+ switch (c) {
+ case TCG_COND_LTU:
+ case TCG_COND_TSTNE:
+ return 0;
+ case TCG_COND_GEU:
+ case TCG_COND_TSTEQ:
+ return 1;
+ default:
+ break;
+ }
+ }
+
+ /* TSTNE x,-1 -> NE x,0 */
+ if (b == -1 && is_tst_cond(c)) {
+ args[3] = args[2] = arg_new_constant(ctx, 0);
+ args[4] = tcg_tst_eqne_cond(c);
+ return -1;
+ }
+
+ /* TSTNE x,sign -> LT x,0 */
+ if (b == INT64_MIN && is_tst_cond(c)) {
+ /* bl must be 0, so copy that to bh */
+ args[3] = bl;
+ args[4] = tcg_tst_ltge_cond(c);
+ return -1;
+ }
+ }
+
+ if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
+ r = do_constant_folding_cond_eq(c);
+ if (r >= 0) {
+ return r;
+ }
+
+ /* TSTNE x,x -> NE x,0 */
+ if (is_tst_cond(c)) {
+ args[3] = args[2] = arg_new_constant(ctx, 0);
+ args[4] = tcg_tst_eqne_cond(c);
+ return -1;
+ }
+ }
+
+ /* Expand to AND with a temporary if no backend support. */
+ if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
+ TCGOp *op1 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3);
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3);
+ TCGArg t1 = arg_new_temp(ctx);
+ TCGArg t2 = arg_new_temp(ctx);
+
+ op1->args[0] = t1;
+ op1->args[1] = al;
+ op1->args[2] = bl;
+ op2->args[0] = t2;
+ op2->args[1] = ah;
+ op2->args[2] = bh;
+
+ args[0] = t1;
+ args[1] = t2;
+ args[3] = args[2] = arg_new_constant(ctx, 0);
+ args[4] = tcg_tst_eqne_cond(c);
+ }
+ return -1;
+}
+
static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
{
for (int i = 0; i < nb_args; i++) {
static void copy_propagate(OptContext *ctx, TCGOp *op,
int nb_oargs, int nb_iargs)
{
- TCGContext *s = ctx->tcg;
-
for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
TCGTemp *ts = arg_temp(op->args[i]);
if (ts_is_copy(ts)) {
- op->args[i] = temp_arg(find_better_copy(s, ts));
+ op->args[i] = temp_arg(find_better_copy(ts));
}
}
}
int i, nb_oargs;
/*
- * For an opcode that ends a BB, reset all temp data.
- * We do no cross-BB optimization.
+ * We only optimize extended basic blocks. If the opcode ends a BB
+ * and is not a conditional branch, reset all temp data.
*/
if (def->flags & TCG_OPF_BB_END) {
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
ctx->prev_mb = NULL;
+ if (!(def->flags & TCG_OPF_COND_BRANCH)) {
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
+ remove_mem_copy_all(ctx);
+ }
return;
}
nb_oargs = def->nb_oargs;
for (i = 0; i < nb_oargs; i++) {
TCGTemp *ts = arg_temp(op->args[i]);
- reset_ts(ts);
+ reset_ts(ctx, ts);
/*
* Save the corresponding known-zero/sign bits mask for the
* first output argument (only one supported so far).
/* If the binary operation has first argument @i, fold to @i. */
static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
{
- if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+ if (arg_is_const_val(op->args[1], i)) {
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}
return false;
/* If the binary operation has first argument @i, fold to NOT. */
static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
{
- if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+ if (arg_is_const_val(op->args[1], i)) {
return fold_to_not(ctx, op, 2);
}
return false;
/* If the binary operation has second argument @i, fold to @i. */
static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
{
- if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+ if (arg_is_const_val(op->args[2], i)) {
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}
return false;
/* If the binary operation has second argument @i, fold to identity. */
static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
{
- if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+ if (arg_is_const_val(op->args[2], i)) {
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
}
return false;
/* If the binary operation has second argument @i, fold to NOT. */
static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
{
- if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+ if (arg_is_const_val(op->args[2], i)) {
return fold_to_not(ctx, op, 1);
}
return false;
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
{
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
- arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
+ bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]);
+ bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]);
+
+ if (a_const && b_const) {
uint64_t al = arg_info(op->args[2])->val;
uint64_t ah = arg_info(op->args[3])->val;
uint64_t bl = arg_info(op->args[4])->val;
tcg_opt_gen_movi(ctx, op2, rh, ah);
return true;
}
+
+ /* Fold sub2 r,x,i to add2 r,x,-i */
+ if (!add && b_const) {
+ uint64_t bl = arg_info(op->args[4])->val;
+ uint64_t bh = arg_info(op->args[5])->val;
+
+ /* Negate the two parts without assembling and disassembling. */
+ bl = -bl;
+ bh = ~bh + !bl;
+
+ op->opc = (ctx->type == TCG_TYPE_I32
+ ? INDEX_op_add2_i32 : INDEX_op_add2_i64);
+ op->args[4] = arg_new_constant(ctx, bl);
+ op->args[5] = arg_new_constant(ctx, bh);
+ }
return false;
}
static bool fold_brcond(OptContext *ctx, TCGOp *op)
{
- TCGCond cond = op->args[2];
- int i;
-
- if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
- op->args[2] = cond = tcg_swap_cond(cond);
- }
-
- i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
+ int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
+ &op->args[1], &op->args[2]);
if (i == 0) {
tcg_op_remove(ctx->tcg, op);
return true;
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
{
- TCGCond cond = op->args[4];
- TCGArg label = op->args[5];
+ TCGCond cond;
+ TCGArg label;
int i, inv = 0;
- if (swap_commutative2(&op->args[0], &op->args[2])) {
- op->args[4] = cond = tcg_swap_cond(cond);
- }
-
- i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
+ i = do_constant_folding_cond2(ctx, op, &op->args[0]);
+ cond = op->args[4];
+ label = op->args[5];
if (i >= 0) {
goto do_brcond_const;
}
* Simplify LT/GE comparisons vs zero to a single compare
* vs the high word of the input.
*/
- if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
- arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
+ if (arg_is_const_val(op->args[2], 0) &&
+ arg_is_const_val(op->args[3], 0)) {
goto do_brcond_high;
}
break;
case 0:
goto do_brcond_const;
case 1:
- op->opc = INDEX_op_brcond_i32;
- op->args[1] = op->args[2];
- op->args[2] = cond;
- op->args[3] = label;
- break;
+ goto do_brcond_low;
+ }
+ break;
+
+ case TCG_COND_TSTEQ:
+ case TCG_COND_TSTNE:
+ if (arg_is_const_val(op->args[2], 0)) {
+ goto do_brcond_high;
+ }
+ if (arg_is_const_val(op->args[3], 0)) {
+ goto do_brcond_low;
}
break;
default:
break;
+ do_brcond_low:
+ op->opc = INDEX_op_brcond_i32;
+ op->args[1] = op->args[2];
+ op->args[2] = cond;
+ op->args[3] = label;
+ return fold_brcond(ctx, op);
+
do_brcond_high:
op->opc = INDEX_op_brcond_i32;
op->args[0] = op->args[1];
op->args[1] = op->args[3];
op->args[2] = cond;
op->args[3] = label;
- break;
+ return fold_brcond(ctx, op);
do_brcond_const:
if (i == 0) {
for (i = 0; i < nb_globals; i++) {
if (test_bit(i, ctx->temps_used.l)) {
- reset_ts(&ctx->tcg->temps[i]);
+ reset_ts(ctx, &ctx->tcg->temps[i]);
}
}
}
+ /* If the function has side effects, reset mem data. */
+ if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
+ remove_mem_copy_all(ctx);
+ }
+
/* Reset temp data for outputs. */
for (i = 0; i < nb_oargs; i++) {
- reset_temp(op->args[i]);
+ reset_temp(ctx, op->args[i]);
}
/* Stop optimizing MB across calls. */
static bool fold_deposit(OptContext *ctx, TCGOp *op)
{
+ TCGOpcode and_opc;
+
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
uint64_t t1 = arg_info(op->args[1])->val;
uint64_t t2 = arg_info(op->args[2])->val;
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
}
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ and_opc = INDEX_op_and_i32;
+ break;
+ case TCG_TYPE_I64:
+ and_opc = INDEX_op_and_i64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* Inserting a value into zero at offset 0. */
+ if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) {
+ uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
+
+ op->opc = and_opc;
+ op->args[1] = op->args[2];
+ op->args[2] = arg_new_constant(ctx, mask);
+ ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
+ return false;
+ }
+
+ /* Inserting zero into a value. */
+ if (arg_is_const_val(op->args[2], 0)) {
+ uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
+
+ op->opc = and_opc;
+ op->args[2] = arg_new_constant(ctx, mask);
+ ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
+ return false;
+ }
+
ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
op->args[3], op->args[4],
arg_info(op->args[2])->z_mask);
static bool fold_movcond(OptContext *ctx, TCGOp *op)
{
- TCGCond cond = op->args[5];
int i;
- if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
- op->args[5] = cond = tcg_swap_cond(cond);
- }
/*
* Canonicalize the "false" input reg to match the destination reg so
* that the tcg backend can implement a "move if true" operation.
*/
if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
- op->args[5] = cond = tcg_invert_cond(cond);
+ op->args[5] = tcg_invert_cond(op->args[5]);
}
- i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+ i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
+ &op->args[2], &op->args[5]);
if (i >= 0) {
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
}
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
uint64_t tv = arg_info(op->args[3])->val;
uint64_t fv = arg_info(op->args[4])->val;
- TCGOpcode opc;
+ TCGOpcode opc, negopc = 0;
+ TCGCond cond = op->args[5];
switch (ctx->type) {
case TCG_TYPE_I32:
opc = INDEX_op_setcond_i32;
+ if (TCG_TARGET_HAS_negsetcond_i32) {
+ negopc = INDEX_op_negsetcond_i32;
+ }
+ tv = (int32_t)tv;
+ fv = (int32_t)fv;
break;
case TCG_TYPE_I64:
opc = INDEX_op_setcond_i64;
+ if (TCG_TARGET_HAS_negsetcond_i64) {
+ negopc = INDEX_op_negsetcond_i64;
+ }
break;
default:
g_assert_not_reached();
} else if (fv == 1 && tv == 0) {
op->opc = opc;
op->args[3] = tcg_invert_cond(cond);
+ } else if (negopc) {
+ if (tv == -1 && fv == 0) {
+ op->opc = negopc;
+ op->args[3] = cond;
+ } else if (fv == -1 && tv == 0) {
+ op->opc = negopc;
+ op->args[3] = tcg_invert_cond(cond);
+ }
}
}
return false;
return false;
}
-static bool fold_neg(OptContext *ctx, TCGOp *op)
+static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask;
-
- if (fold_const1(ctx, op)) {
- return true;
- }
-
/* Set to 1 all bits to the left of the rightmost. */
- z_mask = arg_info(op->args[1])->z_mask;
+ uint64_t z_mask = arg_info(op->args[1])->z_mask;
ctx->z_mask = -(z_mask & -z_mask);
/*
return true;
}
+static bool fold_neg(OptContext *ctx, TCGOp *op)
+{
+ return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
+}
+
static bool fold_nor(OptContext *ctx, TCGOp *op)
{
if (fold_const2_commutative(ctx, op) ||
return false;
}
-static bool fold_setcond(OptContext *ctx, TCGOp *op)
+static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
+{
+ uint64_t a_zmask, b_val;
+ TCGCond cond;
+
+ if (!arg_is_const(op->args[2])) {
+ return false;
+ }
+
+ a_zmask = arg_info(op->args[1])->z_mask;
+ b_val = arg_info(op->args[2])->val;
+ cond = op->args[3];
+
+ if (ctx->type == TCG_TYPE_I32) {
+ a_zmask = (uint32_t)a_zmask;
+ b_val = (uint32_t)b_val;
+ }
+
+ /*
+ * A with only low bits set vs B with high bits set means that A < B.
+ */
+ if (a_zmask < b_val) {
+ bool inv = false;
+
+ switch (cond) {
+ case TCG_COND_NE:
+ case TCG_COND_LEU:
+ case TCG_COND_LTU:
+ inv = true;
+ /* fall through */
+ case TCG_COND_GTU:
+ case TCG_COND_GEU:
+ case TCG_COND_EQ:
+ return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
+ default:
+ break;
+ }
+ }
+
+ /*
+ * A with only lsb set is already boolean.
+ */
+ if (a_zmask <= 1) {
+ bool convert = false;
+ bool inv = false;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ inv = true;
+ /* fall through */
+ case TCG_COND_NE:
+ convert = (b_val == 0);
+ break;
+ case TCG_COND_LTU:
+ case TCG_COND_TSTEQ:
+ inv = true;
+ /* fall through */
+ case TCG_COND_GEU:
+ case TCG_COND_TSTNE:
+ convert = (b_val == 1);
+ break;
+ default:
+ break;
+ }
+ if (convert) {
+ TCGOpcode add_opc, xor_opc, neg_opc;
+
+ if (!inv && !neg) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+ }
+
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ add_opc = INDEX_op_add_i32;
+ neg_opc = INDEX_op_neg_i32;
+ xor_opc = INDEX_op_xor_i32;
+ break;
+ case TCG_TYPE_I64:
+ add_opc = INDEX_op_add_i64;
+ neg_opc = INDEX_op_neg_i64;
+ xor_opc = INDEX_op_xor_i64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (!inv) {
+ op->opc = neg_opc;
+ } else if (neg) {
+ op->opc = add_opc;
+ op->args[2] = arg_new_constant(ctx, -1);
+ } else {
+ op->opc = xor_opc;
+ op->args[2] = arg_new_constant(ctx, 1);
+ }
+ return false;
+ }
+ }
+
+ return false;
+}
+
+static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
{
+ TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc;
+ TCGOpcode uext_opc = 0, sext_opc = 0;
TCGCond cond = op->args[3];
- int i;
+ TCGArg ret, src1, src2;
+ TCGOp *op2;
+ uint64_t val;
+ int sh;
+ bool inv;
+
+ if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
+ return;
+ }
- if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
- op->args[3] = cond = tcg_swap_cond(cond);
+ src2 = op->args[2];
+ val = arg_info(src2)->val;
+ if (!is_power_of_2(val)) {
+ return;
+ }
+ sh = ctz64(val);
+
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ and_opc = INDEX_op_and_i32;
+ sub_opc = INDEX_op_sub_i32;
+ xor_opc = INDEX_op_xor_i32;
+ shr_opc = INDEX_op_shr_i32;
+ neg_opc = INDEX_op_neg_i32;
+ if (TCG_TARGET_extract_i32_valid(sh, 1)) {
+ uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
+ sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
+ }
+ break;
+ case TCG_TYPE_I64:
+ and_opc = INDEX_op_and_i64;
+ sub_opc = INDEX_op_sub_i64;
+ xor_opc = INDEX_op_xor_i64;
+ shr_opc = INDEX_op_shr_i64;
+ neg_opc = INDEX_op_neg_i64;
+ if (TCG_TARGET_extract_i64_valid(sh, 1)) {
+ uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
+ sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
+ }
+ break;
+ default:
+ g_assert_not_reached();
}
- i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+ ret = op->args[0];
+ src1 = op->args[1];
+ inv = cond == TCG_COND_TSTEQ;
+
+ if (sh && sext_opc && neg && !inv) {
+ op->opc = sext_opc;
+ op->args[1] = src1;
+ op->args[2] = sh;
+ op->args[3] = 1;
+ return;
+ } else if (sh && uext_opc) {
+ op->opc = uext_opc;
+ op->args[1] = src1;
+ op->args[2] = sh;
+ op->args[3] = 1;
+ } else {
+ if (sh) {
+ op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3);
+ op2->args[0] = ret;
+ op2->args[1] = src1;
+ op2->args[2] = arg_new_constant(ctx, sh);
+ src1 = ret;
+ }
+ op->opc = and_opc;
+ op->args[1] = src1;
+ op->args[2] = arg_new_constant(ctx, 1);
+ }
+
+ if (neg && inv) {
+ op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3);
+ op2->args[0] = ret;
+ op2->args[1] = ret;
+ op2->args[2] = arg_new_constant(ctx, 1);
+ } else if (inv) {
+ op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3);
+ op2->args[0] = ret;
+ op2->args[1] = ret;
+ op2->args[2] = arg_new_constant(ctx, 1);
+ } else if (neg) {
+ op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2);
+ op2->args[0] = ret;
+ op2->args[1] = ret;
+ }
+}
+
+static bool fold_setcond(OptContext *ctx, TCGOp *op)
+{
+ int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
+ &op->args[2], &op->args[3]);
if (i >= 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}
+ if (fold_setcond_zmask(ctx, op, false)) {
+ return true;
+ }
+ fold_setcond_tst_pow2(ctx, op, false);
+
ctx->z_mask = 1;
ctx->s_mask = smask_from_zmask(1);
return false;
}
-static bool fold_setcond2(OptContext *ctx, TCGOp *op)
+static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
{
- TCGCond cond = op->args[5];
- int i, inv = 0;
+ int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
+ &op->args[2], &op->args[3]);
+ if (i >= 0) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
+ }
- if (swap_commutative2(&op->args[1], &op->args[3])) {
- op->args[5] = cond = tcg_swap_cond(cond);
+ if (fold_setcond_zmask(ctx, op, true)) {
+ return true;
}
+ fold_setcond_tst_pow2(ctx, op, true);
+
+ /* Value is {0,-1} so all bits are repetitions of the sign. */
+ ctx->s_mask = -1;
+ return false;
+}
- i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
+{
+ TCGCond cond;
+ int i, inv = 0;
+
+ i = do_constant_folding_cond2(ctx, op, &op->args[1]);
+ cond = op->args[5];
if (i >= 0) {
goto do_setcond_const;
}
* Simplify LT/GE comparisons vs zero to a single compare
* vs the high word of the input.
*/
- if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
- arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
+ if (arg_is_const_val(op->args[3], 0) &&
+ arg_is_const_val(op->args[4], 0)) {
goto do_setcond_high;
}
break;
case 0:
goto do_setcond_const;
case 1:
- op->args[2] = op->args[3];
- op->args[3] = cond;
- op->opc = INDEX_op_setcond_i32;
- break;
+ goto do_setcond_low;
+ }
+ break;
+
+ case TCG_COND_TSTEQ:
+ case TCG_COND_TSTNE:
+ if (arg_is_const_val(op->args[2], 0)) {
+ goto do_setcond_high;
+ }
+ if (arg_is_const_val(op->args[4], 0)) {
+ goto do_setcond_low;
}
break;
default:
break;
+ do_setcond_low:
+ op->args[2] = op->args[3];
+ op->args[3] = cond;
+ op->opc = INDEX_op_setcond_i32;
+ return fold_setcond(ctx, op);
+
do_setcond_high:
op->args[1] = op->args[2];
op->args[2] = op->args[4];
op->args[3] = cond;
op->opc = INDEX_op_setcond_i32;
- break;
+ return fold_setcond(ctx, op);
}
ctx->z_mask = 1;
* will not reduced the number of input sign repetitions.
*/
sign = (s_mask & -s_mask) >> 1;
- if (!(z_mask & sign)) {
+ if (sign && !(z_mask & sign)) {
ctx->s_mask = s_mask;
}
break;
switch (ctx->type) {
case TCG_TYPE_I32:
neg_op = INDEX_op_neg_i32;
- have_neg = TCG_TARGET_HAS_neg_i32;
+ have_neg = true;
break;
case TCG_TYPE_I64:
neg_op = INDEX_op_neg_i64;
- have_neg = TCG_TARGET_HAS_neg_i64;
+ have_neg = true;
break;
case TCG_TYPE_V64:
case TCG_TYPE_V128:
if (have_neg) {
op->opc = neg_op;
op->args[1] = op->args[2];
- return fold_neg(ctx, op);
+ return fold_neg_no_const(ctx, op);
}
return false;
}
static bool fold_sub(OptContext *ctx, TCGOp *op)
{
- return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
+ if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
+ return true;
+ }
+
+ /* Fold sub r,x,i to add r,x,-i */
+ if (arg_is_const(op->args[2])) {
+ uint64_t val = arg_info(op->args[2])->val;
+
+ op->opc = (ctx->type == TCG_TYPE_I32
+ ? INDEX_op_add_i32 : INDEX_op_add_i64);
+ op->args[2] = arg_new_constant(ctx, -val);
+ }
+ return false;
}
static bool fold_sub2(OptContext *ctx, TCGOp *op)
return false;
}
+static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
+{
+ TCGTemp *dst, *src;
+ intptr_t ofs;
+ TCGType type;
+
+ if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
+ return false;
+ }
+
+ type = ctx->type;
+ ofs = op->args[2];
+ dst = arg_temp(op->args[0]);
+ src = find_mem_copy_for(ctx, type, ofs);
+ if (src && src->base_type == type) {
+ return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
+ }
+
+ reset_ts(ctx, dst);
+ record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
+ return true;
+}
+
+static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
+{
+ intptr_t ofs = op->args[2];
+ intptr_t lm1;
+
+ if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
+ remove_mem_copy_all(ctx);
+ return false;
+ }
+
+ switch (op->opc) {
+ CASE_OP_32_64(st8):
+ lm1 = 0;
+ break;
+ CASE_OP_32_64(st16):
+ lm1 = 1;
+ break;
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i32:
+ lm1 = 3;
+ break;
+ case INDEX_op_st_i64:
+ lm1 = 7;
+ break;
+ case INDEX_op_st_vec:
+ lm1 = tcg_type_size(ctx->type) - 1;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ remove_mem_copy_in(ctx, ofs, ofs + lm1);
+ return false;
+}
+
+static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
+{
+ TCGTemp *src;
+ intptr_t ofs, last;
+ TCGType type;
+
+ if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
+ fold_tcg_st(ctx, op);
+ return false;
+ }
+
+ src = arg_temp(op->args[0]);
+ ofs = op->args[2];
+ type = ctx->type;
+
+ /*
+ * Eliminate duplicate stores of a constant.
+ * This happens frequently when the target ISA zero-extends.
+ */
+ if (ts_is_const(src)) {
+ TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
+ if (src == prev) {
+ tcg_op_remove(ctx->tcg, op);
+ return true;
+ }
+ }
+
+ last = ofs + tcg_type_size(type) - 1;
+ remove_mem_copy_in(ctx, ofs, last);
+ record_mem_copy(ctx, type, src, ofs, last);
+ return false;
+}
+
static bool fold_xor(OptContext *ctx, TCGOp *op)
{
if (fold_const2_commutative(ctx, op) ||
TCGOp *op, *op_next;
OptContext ctx = { .tcg = s };
+ QSIMPLEQ_INIT(&ctx.mem_free);
+
/* Array VALS has an element for each temp.
If this temp holds a constant then its value is kept in VALS' element.
If this temp is a copy of other ones then the other copies are
case INDEX_op_ld32u_i64:
done = fold_tcg_ld(&ctx, op);
break;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld_i64:
+ case INDEX_op_ld_vec:
+ done = fold_tcg_ld_memcopy(&ctx, op);
+ break;
+ CASE_OP_32_64(st8):
+ CASE_OP_32_64(st16):
+ case INDEX_op_st32_i64:
+ done = fold_tcg_st(&ctx, op);
+ break;
+ case INDEX_op_st_i32:
+ case INDEX_op_st_i64:
+ case INDEX_op_st_vec:
+ done = fold_tcg_st_memcopy(&ctx, op);
+ break;
case INDEX_op_mb:
done = fold_mb(&ctx, op);
break;
CASE_OP_32_64(setcond):
done = fold_setcond(&ctx, op);
break;
+ CASE_OP_32_64(negsetcond):
+ done = fold_negsetcond(&ctx, op);
+ break;
case INDEX_op_setcond2_i32:
done = fold_setcond2(&ctx, op);
break;