#include "qemu/osdep.h"
#include "qemu/int128.h"
-#include "tcg/tcg-op.h"
+#include "qemu/interval-tree.h"
+#include "tcg/tcg-op-common.h"
#include "tcg-internal.h"
#define CASE_OP_32_64(x) \
glue(glue(case INDEX_op_, x), _i64): \
glue(glue(case INDEX_op_, x), _vec)
+typedef struct MemCopyInfo {
+ IntervalTreeNode itree;
+ QSIMPLEQ_ENTRY (MemCopyInfo) next;
+ TCGTemp *ts;
+ TCGType type;
+} MemCopyInfo;
+
typedef struct TempOptInfo {
bool is_const;
TCGTemp *prev_copy;
TCGTemp *next_copy;
+ QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
uint64_t val;
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
+ uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
} TempOptInfo;
typedef struct OptContext {
TCGOp *prev_mb;
TCGTempSet temps_used;
+ IntervalTreeRoot mem_copy;
+ QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
+
/* In flight values from optimization. */
uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
+ uint64_t s_mask; /* mask of clrsb(value) bits */
TCGType type;
} OptContext;
+/* Calculate the smask for a specific value. */
+static uint64_t smask_from_value(uint64_t value)
+{
+ int rep = clrsb64(value);
+ return ~(~0ull >> rep);
+}
+
+/*
+ * Calculate the smask for a given set of known-zeros.
+ * If there are lots of zeros on the left, we can consider the remainder
+ * an unsigned field, and thus the corresponding signed field is one bit
+ * larger.
+ */
+static uint64_t smask_from_zmask(uint64_t zmask)
+{
+ /*
+ * Only the 0 bits are significant for zmask, thus the msb itself
+ * must be zero, else we have no sign information.
+ */
+ int rep = clz64(zmask);
+ if (rep == 0) {
+ return 0;
+ }
+ rep -= 1;
+ return ~(~0ull >> rep);
+}
+
+/*
+ * Recreate a properly left-aligned smask after manipulation.
+ * Some bit-shuffling, particularly shifts and rotates, may
+ * retain sign bits on the left, but may scatter disconnected
+ * sign bits on the right. Retain only what remains to the left.
+ */
+static uint64_t smask_from_smask(int64_t smask)
+{
+ /* Only the 1 bits are significant for smask */
+ return smask_from_zmask(~smask);
+}
+
static inline TempOptInfo *ts_info(TCGTemp *ts)
{
return ts->state_ptr;
return ts_info(ts)->next_copy != ts;
}
-/* Reset TEMP's state, possibly removing the temp for the list of copies. */
-static void reset_ts(TCGTemp *ts)
+static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
{
- TempOptInfo *ti = ts_info(ts);
- TempOptInfo *pi = ts_info(ti->prev_copy);
- TempOptInfo *ni = ts_info(ti->next_copy);
-
- ni->prev_copy = ti->prev_copy;
- pi->next_copy = ti->next_copy;
- ti->next_copy = ts;
- ti->prev_copy = ts;
- ti->is_const = false;
- ti->z_mask = -1;
-}
-
-static void reset_temp(TCGArg arg)
-{
- reset_ts(arg_temp(arg));
+ return a->kind < b->kind ? b : a;
}
/* Initialize and activate a temporary. */
ti->next_copy = ts;
ti->prev_copy = ts;
+ QSIMPLEQ_INIT(&ti->mem_copy);
if (ts->kind == TEMP_CONST) {
ti->is_const = true;
ti->val = ts->val;
ti->z_mask = ts->val;
- if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
- /* High bits of a 32-bit quantity are garbage. */
- ti->z_mask |= ~0xffffffffull;
- }
+ ti->s_mask = smask_from_value(ts->val);
} else {
ti->is_const = false;
ti->z_mask = -1;
+ ti->s_mask = 0;
}
}
-static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
+static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
+{
+ IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
+ return r ? container_of(r, MemCopyInfo, itree) : NULL;
+}
+
+static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
+{
+ IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
+ return r ? container_of(r, MemCopyInfo, itree) : NULL;
+}
+
+static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
{
- TCGTemp *i, *g, *l;
+ TCGTemp *ts = mc->ts;
+ TempOptInfo *ti = ts_info(ts);
+
+ interval_tree_remove(&mc->itree, &ctx->mem_copy);
+ QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
+ QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
+}
+
+static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
+{
+ while (true) {
+ MemCopyInfo *mc = mem_copy_first(ctx, s, l);
+ if (!mc) {
+ break;
+ }
+ remove_mem_copy(ctx, mc);
+ }
+}
+
+static void remove_mem_copy_all(OptContext *ctx)
+{
+ remove_mem_copy_in(ctx, 0, -1);
+ tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
+}
+
+static TCGTemp *find_better_copy(TCGTemp *ts)
+{
+ TCGTemp *i, *ret;
/* If this is already readonly, we can't do better. */
if (temp_readonly(ts)) {
return ts;
}
- g = l = NULL;
+ ret = ts;
for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
- if (temp_readonly(i)) {
- return i;
- } else if (i->kind > ts->kind) {
- if (i->kind == TEMP_GLOBAL) {
- g = i;
- } else if (i->kind == TEMP_LOCAL) {
- l = i;
+ ret = cmp_better_copy(ret, i);
+ }
+ return ret;
+}
+
+static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
+{
+ TempOptInfo *si = ts_info(src_ts);
+ TempOptInfo *di = ts_info(dst_ts);
+ MemCopyInfo *mc;
+
+ QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
+ tcg_debug_assert(mc->ts == src_ts);
+ mc->ts = dst_ts;
+ }
+ QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
+}
+
+/* Reset TEMP's state, possibly removing the temp for the list of copies. */
+static void reset_ts(OptContext *ctx, TCGTemp *ts)
+{
+ TempOptInfo *ti = ts_info(ts);
+ TCGTemp *pts = ti->prev_copy;
+ TCGTemp *nts = ti->next_copy;
+ TempOptInfo *pi = ts_info(pts);
+ TempOptInfo *ni = ts_info(nts);
+
+ ni->prev_copy = ti->prev_copy;
+ pi->next_copy = ti->next_copy;
+ ti->next_copy = ts;
+ ti->prev_copy = ts;
+ ti->is_const = false;
+ ti->z_mask = -1;
+ ti->s_mask = 0;
+
+ if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
+ if (ts == nts) {
+ /* Last temp copy being removed, the mem copies die. */
+ MemCopyInfo *mc;
+ QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
+ interval_tree_remove(&mc->itree, &ctx->mem_copy);
}
+ QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
+ } else {
+ move_mem_copies(find_better_copy(nts), ts);
}
}
+}
+
+static void reset_temp(OptContext *ctx, TCGArg arg)
+{
+ reset_ts(ctx, arg_temp(arg));
+}
+
+static void record_mem_copy(OptContext *ctx, TCGType type,
+ TCGTemp *ts, intptr_t start, intptr_t last)
+{
+ MemCopyInfo *mc;
+ TempOptInfo *ti;
+
+ mc = QSIMPLEQ_FIRST(&ctx->mem_free);
+ if (mc) {
+ QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
+ } else {
+ mc = tcg_malloc(sizeof(*mc));
+ }
+
+ memset(mc, 0, sizeof(*mc));
+ mc->itree.start = start;
+ mc->itree.last = last;
+ mc->type = type;
+ interval_tree_insert(&mc->itree, &ctx->mem_copy);
- /* If we didn't find a better representation, return the same temp. */
- return g ? g : l ? l : ts;
+ ts = find_better_copy(ts);
+ ti = ts_info(ts);
+ mc->ts = ts;
+ QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
}
static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
}
+static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
+{
+ MemCopyInfo *mc;
+
+ for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
+ if (mc->itree.start == s && mc->type == type) {
+ return find_better_copy(mc->ts);
+ }
+ }
+ return NULL;
+}
+
+static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
+{
+ TCGType type = ctx->type;
+ TCGTemp *ts;
+
+ if (type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+
+ ts = tcg_constant_internal(type, val);
+ init_ts_info(ctx, ts);
+
+ return temp_arg(ts);
+}
+
static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
{
TCGTemp *dst_ts = arg_temp(dst);
TCGTemp *src_ts = arg_temp(src);
TempOptInfo *di;
TempOptInfo *si;
- uint64_t z_mask;
TCGOpcode new_op;
if (ts_are_copies(dst_ts, src_ts)) {
return true;
}
- reset_ts(dst_ts);
+ reset_ts(ctx, dst_ts);
di = ts_info(dst_ts);
si = ts_info(src_ts);
op->args[0] = dst;
op->args[1] = src;
- z_mask = si->z_mask;
- if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
- /* High bits of the destination are now garbage. */
- z_mask |= ~0xffffffffull;
- }
- di->z_mask = z_mask;
+ di->z_mask = si->z_mask;
+ di->s_mask = si->s_mask;
if (src_ts->type == dst_ts->type) {
TempOptInfo *ni = ts_info(si->next_copy);
si->next_copy = dst_ts;
di->is_const = si->is_const;
di->val = si->val;
+
+ if (!QSIMPLEQ_EMPTY(&si->mem_copy)
+ && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
+ move_mem_copies(dst_ts, src_ts);
+ }
}
return true;
}
TCGArg dst, uint64_t val)
{
/* Convert movi to mov with constant temp. */
- TCGTemp *tv = tcg_constant_internal(ctx->type, val);
-
- init_ts_info(ctx, tv);
- return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
+ return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
}
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
CASE_OP_32_64(mul):
return x * y;
- CASE_OP_32_64(and):
+ CASE_OP_32_64_VEC(and):
return x & y;
- CASE_OP_32_64(or):
+ CASE_OP_32_64_VEC(or):
return x | y;
- CASE_OP_32_64(xor):
+ CASE_OP_32_64_VEC(xor):
return x ^ y;
case INDEX_op_shl_i32:
case INDEX_op_rotl_i64:
return rol64(x, y & 63);
- CASE_OP_32_64(not):
+ CASE_OP_32_64_VEC(not):
return ~x;
CASE_OP_32_64(neg):
return -x;
- CASE_OP_32_64(andc):
+ CASE_OP_32_64_VEC(andc):
return x & ~y;
- CASE_OP_32_64(orc):
+ CASE_OP_32_64_VEC(orc):
return x | ~y;
- CASE_OP_32_64(eqv):
+ CASE_OP_32_64_VEC(eqv):
return ~(x ^ y);
- CASE_OP_32_64(nand):
+ CASE_OP_32_64_VEC(nand):
return ~(x & y);
- CASE_OP_32_64(nor):
+ CASE_OP_32_64_VEC(nor):
return ~(x | y);
case INDEX_op_clz_i32:
return (uint64_t)x % ((uint64_t)y ? : 1);
default:
- fprintf(stderr,
- "Unrecognized operation %d in do_constant_folding.\n", op);
- tcg_abort();
+ g_assert_not_reached();
}
}
case TCG_COND_GTU:
return x > y;
default:
- tcg_abort();
+ g_assert_not_reached();
}
}
case TCG_COND_GTU:
return x > y;
default:
- tcg_abort();
+ g_assert_not_reached();
}
}
case TCG_COND_EQ:
return 1;
default:
- tcg_abort();
+ g_assert_not_reached();
}
}
static int do_constant_folding_cond(TCGType type, TCGArg x,
TCGArg y, TCGCond c)
{
- uint64_t xv = arg_info(x)->val;
- uint64_t yv = arg_info(y)->val;
-
if (arg_is_const(x) && arg_is_const(y)) {
+ uint64_t xv = arg_info(x)->val;
+ uint64_t yv = arg_info(y)->val;
+
switch (type) {
case TCG_TYPE_I32:
return do_constant_folding_cond_32(xv, yv, c);
}
} else if (args_are_copies(x, y)) {
return do_constant_folding_cond_eq(c);
- } else if (arg_is_const(y) && yv == 0) {
+ } else if (arg_is_const(y) && arg_info(y)->val == 0) {
switch (c) {
case TCG_COND_LTU:
return 0;
return -1;
}
+/**
+ * swap_commutative:
+ * @dest: TCGArg of the destination argument, or NO_DEST.
+ * @p1: first paired argument
+ * @p2: second paired argument
+ *
+ * If *@p1 is a constant and *@p2 is not, swap.
+ * If *@p2 matches @dest, swap.
+ * Return true if a swap was performed.
+ */
+
+#define NO_DEST temp_arg(NULL)
+
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
{
TCGArg a1 = *p1, a2 = *p2;
{
for (int i = 0; i < nb_args; i++) {
TCGTemp *ts = arg_temp(op->args[i]);
- if (ts) {
- init_ts_info(ctx, ts);
- }
+ init_ts_info(ctx, ts);
}
}
static void copy_propagate(OptContext *ctx, TCGOp *op,
int nb_oargs, int nb_iargs)
{
- TCGContext *s = ctx->tcg;
-
for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
TCGTemp *ts = arg_temp(op->args[i]);
- if (ts && ts_is_copy(ts)) {
- op->args[i] = temp_arg(find_better_copy(s, ts));
+ if (ts_is_copy(ts)) {
+ op->args[i] = temp_arg(find_better_copy(ts));
}
}
}
int i, nb_oargs;
/*
- * For an opcode that ends a BB, reset all temp data.
- * We do no cross-BB optimization.
+ * We only optimize extended basic blocks. If the opcode ends a BB
+ * and is not a conditional branch, reset all temp data.
*/
if (def->flags & TCG_OPF_BB_END) {
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
ctx->prev_mb = NULL;
+ if (!(def->flags & TCG_OPF_COND_BRANCH)) {
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
+ remove_mem_copy_all(ctx);
+ }
return;
}
nb_oargs = def->nb_oargs;
for (i = 0; i < nb_oargs; i++) {
- reset_temp(op->args[i]);
+ TCGTemp *ts = arg_temp(op->args[i]);
+ reset_ts(ctx, ts);
/*
- * Save the corresponding known-zero bits mask for the
+ * Save the corresponding known-zero/sign bits mask for the
* first output argument (only one supported so far).
*/
if (i == 0) {
- arg_info(op->args[i])->z_mask = ctx->z_mask;
+ ts_info(ts)->z_mask = ctx->z_mask;
+ ts_info(ts)->s_mask = ctx->s_mask;
}
}
}
return false;
}
+static bool fold_commutative(OptContext *ctx, TCGOp *op)
+{
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+ return false;
+}
+
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
+{
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+ return fold_const2(ctx, op);
+}
+
static bool fold_masks(OptContext *ctx, TCGOp *op)
{
uint64_t a_mask = ctx->a_mask;
uint64_t z_mask = ctx->z_mask;
+ uint64_t s_mask = ctx->s_mask;
/*
- * 32-bit ops generate 32-bit results. For the result is zero test
- * below, we can ignore high bits, but for further optimizations we
- * need to record that the high bits contain garbage.
+ * 32-bit ops generate 32-bit results, which for the purpose of
+ * simplifying tcg are sign-extended. Certainly that's how we
+ * represent our constants elsewhere. Note that the bits will
+ * be reset properly for a 64-bit value when encountering the
+ * type changing opcodes.
*/
if (ctx->type == TCG_TYPE_I32) {
- ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
- a_mask &= MAKE_64BIT_MASK(0, 32);
- z_mask &= MAKE_64BIT_MASK(0, 32);
+ a_mask = (int32_t)a_mask;
+ z_mask = (int32_t)z_mask;
+ s_mask |= MAKE_64BIT_MASK(32, 32);
+ ctx->z_mask = z_mask;
+ ctx->s_mask = s_mask;
}
if (z_mask == 0) {
static bool fold_add(OptContext *ctx, TCGOp *op)
{
- if (fold_const2(ctx, op) ||
+ if (fold_const2_commutative(ctx, op) ||
+ fold_xi_to_x(ctx, op, 0)) {
+ return true;
+ }
+ return false;
+}
+
+/* We cannot as yet do_constant_folding with vectors. */
+static bool fold_add_vec(OptContext *ctx, TCGOp *op)
+{
+ if (fold_commutative(ctx, op) ||
fold_xi_to_x(ctx, op, 0)) {
return true;
}
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
{
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
- arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
+ bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]);
+ bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]);
+
+ if (a_const && b_const) {
uint64_t al = arg_info(op->args[2])->val;
uint64_t ah = arg_info(op->args[3])->val;
uint64_t bl = arg_info(op->args[4])->val;
rh = op->args[1];
/* The proper opcode is supplied by tcg_opt_gen_mov. */
- op2 = tcg_op_insert_before(ctx->tcg, op, 0);
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
tcg_opt_gen_movi(ctx, op, rl, al);
tcg_opt_gen_movi(ctx, op2, rh, ah);
return true;
}
+
+ /* Fold sub2 r,x,i to add2 r,x,-i */
+ if (!add && b_const) {
+ uint64_t bl = arg_info(op->args[4])->val;
+ uint64_t bh = arg_info(op->args[5])->val;
+
+ /* Negate the two parts without assembling and disassembling. */
+ bl = -bl;
+ bh = ~bh + !bl;
+
+ op->opc = (ctx->type == TCG_TYPE_I32
+ ? INDEX_op_add2_i32 : INDEX_op_add2_i64);
+ op->args[4] = arg_new_constant(ctx, bl);
+ op->args[5] = arg_new_constant(ctx, bh);
+ }
return false;
}
static bool fold_add2(OptContext *ctx, TCGOp *op)
{
+ /* Note that the high and low parts may be independently swapped. */
+ swap_commutative(op->args[0], &op->args[2], &op->args[4]);
+ swap_commutative(op->args[1], &op->args[3], &op->args[5]);
+
return fold_addsub2(ctx, op, true);
}
{
uint64_t z1, z2;
- if (fold_const2(ctx, op) ||
+ if (fold_const2_commutative(ctx, op) ||
fold_xi_to_i(ctx, op, 0) ||
fold_xi_to_x(ctx, op, -1) ||
fold_xx_to_x(ctx, op)) {
z2 = arg_info(op->args[2])->z_mask;
ctx->z_mask = z1 & z2;
+ /*
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
+ * Bitwise operations preserve the relative quantity of the repetitions.
+ */
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+
/*
* Known-zeros does not imply known-ones. Therefore unless
* arg2 is constant, we can't infer affected bits from it.
}
ctx->z_mask = z1;
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
return fold_masks(ctx, op);
}
static bool fold_brcond(OptContext *ctx, TCGOp *op)
{
TCGCond cond = op->args[2];
- int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
+ int i;
+
+ if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
+ op->args[2] = cond = tcg_swap_cond(cond);
+ }
+ i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
if (i == 0) {
tcg_op_remove(ctx->tcg, op);
return true;
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
{
TCGCond cond = op->args[4];
- int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
TCGArg label = op->args[5];
- int inv = 0;
+ int i, inv = 0;
+ if (swap_commutative2(&op->args[0], &op->args[2])) {
+ op->args[4] = cond = tcg_swap_cond(cond);
+ }
+
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
if (i >= 0) {
goto do_brcond_const;
}
static bool fold_bswap(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, sign;
+ uint64_t z_mask, s_mask, sign;
if (arg_is_const(op->args[1])) {
uint64_t t = arg_info(op->args[1])->val;
}
z_mask = arg_info(op->args[1])->z_mask;
+
switch (op->opc) {
case INDEX_op_bswap16_i32:
case INDEX_op_bswap16_i64:
default:
g_assert_not_reached();
}
+ s_mask = smask_from_zmask(z_mask);
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
case TCG_BSWAP_OZ:
/* If the sign bit may be 1, force all the bits above to 1. */
if (z_mask & sign) {
z_mask |= sign;
+ s_mask = sign << 1;
}
break;
default:
/* The high bits are undefined: force all bits above the sign to 1. */
z_mask |= sign << 1;
+ s_mask = 0;
break;
}
ctx->z_mask = z_mask;
+ ctx->s_mask = s_mask;
return fold_masks(ctx, op);
}
for (i = 0; i < nb_globals; i++) {
if (test_bit(i, ctx->temps_used.l)) {
- reset_ts(&ctx->tcg->temps[i]);
+ reset_ts(ctx, &ctx->tcg->temps[i]);
}
}
}
+ /* If the function has side effects, reset mem data. */
+ if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
+ remove_mem_copy_all(ctx);
+ }
+
/* Reset temp data for outputs. */
for (i = 0; i < nb_oargs; i++) {
- reset_temp(op->args[i]);
+ reset_temp(ctx, op->args[i]);
}
/* Stop optimizing MB across calls. */
g_assert_not_reached();
}
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
-
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
return false;
}
default:
g_assert_not_reached();
}
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
return false;
}
static bool fold_deposit(OptContext *ctx, TCGOp *op)
{
+ TCGOpcode and_opc;
+
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
uint64_t t1 = arg_info(op->args[1])->val;
uint64_t t2 = arg_info(op->args[2])->val;
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
}
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ and_opc = INDEX_op_and_i32;
+ break;
+ case TCG_TYPE_I64:
+ and_opc = INDEX_op_and_i64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* Inserting a value into zero at offset 0. */
+ if (arg_is_const(op->args[1])
+ && arg_info(op->args[1])->val == 0
+ && op->args[3] == 0) {
+ uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
+
+ op->opc = and_opc;
+ op->args[1] = op->args[2];
+ op->args[2] = arg_new_constant(ctx, mask);
+ ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
+ return false;
+ }
+
+ /* Inserting zero into a value. */
+ if (arg_is_const(op->args[2])
+ && arg_info(op->args[2])->val == 0) {
+ uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
+
+ op->opc = and_opc;
+ op->args[2] = arg_new_constant(ctx, mask);
+ ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
+ return false;
+ }
+
ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
op->args[3], op->args[4],
arg_info(op->args[2])->z_mask);
static bool fold_divide(OptContext *ctx, TCGOp *op)
{
- return fold_const2(ctx, op);
+ if (fold_const2(ctx, op) ||
+ fold_xi_to_x(ctx, op, 1)) {
+ return true;
+ }
+ return false;
}
static bool fold_dup(OptContext *ctx, TCGOp *op)
static bool fold_eqv(OptContext *ctx, TCGOp *op)
{
- if (fold_const2(ctx, op) ||
+ if (fold_const2_commutative(ctx, op) ||
fold_xi_to_x(ctx, op, -1) ||
fold_xi_to_not(ctx, op, 0)) {
return true;
}
+
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
return false;
}
static bool fold_extract(OptContext *ctx, TCGOp *op)
{
uint64_t z_mask_old, z_mask;
+ int pos = op->args[2];
+ int len = op->args[3];
if (arg_is_const(op->args[1])) {
uint64_t t;
t = arg_info(op->args[1])->val;
- t = extract64(t, op->args[2], op->args[3]);
+ t = extract64(t, pos, len);
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
}
z_mask_old = arg_info(op->args[1])->z_mask;
- z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
- if (op->args[2] == 0) {
+ z_mask = extract64(z_mask_old, pos, len);
+ if (pos == 0) {
ctx->a_mask = z_mask_old ^ z_mask;
}
ctx->z_mask = z_mask;
+ ctx->s_mask = smask_from_zmask(z_mask);
return fold_masks(ctx, op);
}
v2 <<= 64 - shr;
} else {
v1 = (uint32_t)v1 >> shr;
- v2 = (int32_t)v2 << (32 - shr);
+ v2 = (uint64_t)((int32_t)v2 << (32 - shr));
}
return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
}
static bool fold_exts(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask_old, z_mask, sign;
+ uint64_t s_mask_old, s_mask, z_mask, sign;
bool type_change = false;
if (fold_const1(ctx, op)) {
return true;
}
- z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
+ z_mask = arg_info(op->args[1])->z_mask;
+ s_mask = arg_info(op->args[1])->s_mask;
+ s_mask_old = s_mask;
switch (op->opc) {
CASE_OP_32_64(ext8s):
if (z_mask & sign) {
z_mask |= sign;
- } else if (!type_change) {
- ctx->a_mask = z_mask_old ^ z_mask;
}
+ s_mask |= sign << 1;
+
ctx->z_mask = z_mask;
+ ctx->s_mask = s_mask;
+ if (!type_change) {
+ ctx->a_mask = s_mask & ~s_mask_old;
+ }
return fold_masks(ctx, op);
}
}
ctx->z_mask = z_mask;
+ ctx->s_mask = smask_from_zmask(z_mask);
if (!type_change) {
ctx->a_mask = z_mask_old ^ z_mask;
}
static bool fold_movcond(OptContext *ctx, TCGOp *op)
{
TCGCond cond = op->args[5];
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+ int i;
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
+ op->args[5] = cond = tcg_swap_cond(cond);
+ }
+ /*
+ * Canonicalize the "false" input reg to match the destination reg so
+ * that the tcg backend can implement a "move if true" operation.
+ */
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
+ op->args[5] = cond = tcg_invert_cond(cond);
+ }
+
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
if (i >= 0) {
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
}
ctx->z_mask = arg_info(op->args[3])->z_mask
| arg_info(op->args[4])->z_mask;
+ ctx->s_mask = arg_info(op->args[3])->s_mask
+ & arg_info(op->args[4])->s_mask;
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
uint64_t tv = arg_info(op->args[3])->val;
uint64_t fv = arg_info(op->args[4])->val;
- TCGOpcode opc;
+ TCGOpcode opc, negopc = 0;
switch (ctx->type) {
case TCG_TYPE_I32:
opc = INDEX_op_setcond_i32;
+ if (TCG_TARGET_HAS_negsetcond_i32) {
+ negopc = INDEX_op_negsetcond_i32;
+ }
+ tv = (int32_t)tv;
+ fv = (int32_t)fv;
break;
case TCG_TYPE_I64:
opc = INDEX_op_setcond_i64;
+ if (TCG_TARGET_HAS_negsetcond_i64) {
+ negopc = INDEX_op_negsetcond_i64;
+ }
break;
default:
g_assert_not_reached();
} else if (fv == 1 && tv == 0) {
op->opc = opc;
op->args[3] = tcg_invert_cond(cond);
+ } else if (negopc) {
+ if (tv == -1 && fv == 0) {
+ op->opc = negopc;
+ op->args[3] = cond;
+ } else if (fv == -1 && tv == 0) {
+ op->opc = negopc;
+ op->args[3] = tcg_invert_cond(cond);
+ }
}
}
return false;
static bool fold_mul(OptContext *ctx, TCGOp *op)
{
if (fold_const2(ctx, op) ||
- fold_xi_to_i(ctx, op, 0)) {
+ fold_xi_to_i(ctx, op, 0) ||
+ fold_xi_to_x(ctx, op, 1)) {
return true;
}
return false;
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
{
- if (fold_const2(ctx, op) ||
+ if (fold_const2_commutative(ctx, op) ||
fold_xi_to_i(ctx, op, 0)) {
return true;
}
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
{
+ swap_commutative(op->args[0], &op->args[2], &op->args[3]);
+
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
uint64_t a = arg_info(op->args[2])->val;
uint64_t b = arg_info(op->args[3])->val;
rh = op->args[1];
/* The proper opcode is supplied by tcg_opt_gen_mov. */
- op2 = tcg_op_insert_before(ctx->tcg, op, 0);
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
tcg_opt_gen_movi(ctx, op, rl, l);
tcg_opt_gen_movi(ctx, op2, rh, h);
static bool fold_nand(OptContext *ctx, TCGOp *op)
{
- if (fold_const2(ctx, op) ||
+ if (fold_const2_commutative(ctx, op) ||
fold_xi_to_not(ctx, op, -1)) {
return true;
}
+
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
return false;
}
static bool fold_nor(OptContext *ctx, TCGOp *op)
{
- if (fold_const2(ctx, op) ||
+ if (fold_const2_commutative(ctx, op) ||
fold_xi_to_not(ctx, op, 0)) {
return true;
}
+
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
return false;
}
return true;
}
+ ctx->s_mask = arg_info(op->args[1])->s_mask;
+
/* Because of fold_to_not, we want to always return true, via finish. */
finish_folding(ctx, op);
return true;
static bool fold_or(OptContext *ctx, TCGOp *op)
{
- if (fold_const2(ctx, op) ||
+ if (fold_const2_commutative(ctx, op) ||
fold_xi_to_x(ctx, op, 0) ||
fold_xx_to_x(ctx, op)) {
return true;
ctx->z_mask = arg_info(op->args[1])->z_mask
| arg_info(op->args[2])->z_mask;
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
return fold_masks(ctx, op);
}
static bool fold_orc(OptContext *ctx, TCGOp *op)
{
if (fold_const2(ctx, op) ||
+ fold_xx_to_i(ctx, op, -1) ||
fold_xi_to_x(ctx, op, -1) ||
fold_ix_to_not(ctx, op, 0)) {
return true;
}
+
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
return false;
}
MemOp mop = get_memop(oi);
int width = 8 * memop_size(mop);
- if (!(mop & MO_SIGN) && width < 64) {
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
+ if (width < 64) {
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
+ if (!(mop & MO_SIGN)) {
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
+ ctx->s_mask <<= 1;
+ }
}
/* Opcodes that touch guest memory stop the mb optimization. */
static bool fold_remainder(OptContext *ctx, TCGOp *op)
{
- return fold_const2(ctx, op);
+ if (fold_const2(ctx, op) ||
+ fold_xx_to_i(ctx, op, 0)) {
+ return true;
+ }
+ return false;
}
static bool fold_setcond(OptContext *ctx, TCGOp *op)
{
TCGCond cond = op->args[3];
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+ int i;
+
+ if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
+ op->args[3] = cond = tcg_swap_cond(cond);
+ }
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
if (i >= 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}
ctx->z_mask = 1;
+ ctx->s_mask = smask_from_zmask(1);
return false;
}
+static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
+{
+ TCGCond cond = op->args[3];
+ int i;
+
+ if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
+ op->args[3] = cond = tcg_swap_cond(cond);
+ }
+
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+ if (i >= 0) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
+ }
+
+ /* Value is {0,-1} so all bits are repetitions of the sign. */
+ ctx->s_mask = -1;
+ return false;
+}
+
+
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
{
TCGCond cond = op->args[5];
- int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
- int inv = 0;
+ int i, inv = 0;
+
+ if (swap_commutative2(&op->args[1], &op->args[3])) {
+ op->args[5] = cond = tcg_swap_cond(cond);
+ }
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
if (i >= 0) {
goto do_setcond_const;
}
}
ctx->z_mask = 1;
+ ctx->s_mask = smask_from_zmask(1);
return false;
do_setcond_const:
static bool fold_sextract(OptContext *ctx, TCGOp *op)
{
- int64_t z_mask_old, z_mask;
+ uint64_t z_mask, s_mask, s_mask_old;
+ int pos = op->args[2];
+ int len = op->args[3];
if (arg_is_const(op->args[1])) {
uint64_t t;
t = arg_info(op->args[1])->val;
- t = sextract64(t, op->args[2], op->args[3]);
+ t = sextract64(t, pos, len);
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
}
- z_mask_old = arg_info(op->args[1])->z_mask;
- z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
- if (op->args[2] == 0 && z_mask >= 0) {
- ctx->a_mask = z_mask_old ^ z_mask;
- }
+ z_mask = arg_info(op->args[1])->z_mask;
+ z_mask = sextract64(z_mask, pos, len);
ctx->z_mask = z_mask;
+ s_mask_old = arg_info(op->args[1])->s_mask;
+ s_mask = sextract64(s_mask_old, pos, len);
+ s_mask |= MAKE_64BIT_MASK(len, 64 - len);
+ ctx->s_mask = s_mask;
+
+ if (pos == 0) {
+ ctx->a_mask = s_mask & ~s_mask_old;
+ }
+
return fold_masks(ctx, op);
}
static bool fold_shift(OptContext *ctx, TCGOp *op)
{
+ uint64_t s_mask, z_mask, sign;
+
if (fold_const2(ctx, op) ||
fold_ix_to_i(ctx, op, 0) ||
fold_xi_to_x(ctx, op, 0)) {
return true;
}
+ s_mask = arg_info(op->args[1])->s_mask;
+ z_mask = arg_info(op->args[1])->z_mask;
+
if (arg_is_const(op->args[2])) {
- ctx->z_mask = do_constant_folding(op->opc, ctx->type,
- arg_info(op->args[1])->z_mask,
- arg_info(op->args[2])->val);
+ int sh = arg_info(op->args[2])->val;
+
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
+
+ s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
+ ctx->s_mask = smask_from_smask(s_mask);
+
return fold_masks(ctx, op);
}
+
+ switch (op->opc) {
+ CASE_OP_32_64(sar):
+ /*
+ * Arithmetic right shift will not reduce the number of
+ * input sign repetitions.
+ */
+ ctx->s_mask = s_mask;
+ break;
+ CASE_OP_32_64(shr):
+ /*
+ * If the sign bit is known zero, then logical right shift
+ * will not reduced the number of input sign repetitions.
+ */
+ sign = (s_mask & -s_mask) >> 1;
+ if (!(z_mask & sign)) {
+ ctx->s_mask = s_mask;
+ }
+ break;
+ default:
+ break;
+ }
+
return false;
}
switch (ctx->type) {
case TCG_TYPE_I32:
neg_op = INDEX_op_neg_i32;
- have_neg = TCG_TARGET_HAS_neg_i32;
+ have_neg = true;
break;
case TCG_TYPE_I64:
neg_op = INDEX_op_neg_i64;
- have_neg = TCG_TARGET_HAS_neg_i64;
+ have_neg = true;
break;
case TCG_TYPE_V64:
case TCG_TYPE_V128:
return false;
}
-static bool fold_sub(OptContext *ctx, TCGOp *op)
+/* We cannot as yet do_constant_folding with vectors. */
+static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
{
- if (fold_const2(ctx, op) ||
- fold_xx_to_i(ctx, op, 0) ||
+ if (fold_xx_to_i(ctx, op, 0) ||
fold_xi_to_x(ctx, op, 0) ||
fold_sub_to_neg(ctx, op)) {
return true;
return false;
}
+static bool fold_sub(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
+ return true;
+ }
+
+ /* Fold sub r,x,i to add r,x,-i */
+ if (arg_is_const(op->args[2])) {
+ uint64_t val = arg_info(op->args[2])->val;
+
+ op->opc = (ctx->type == TCG_TYPE_I32
+ ? INDEX_op_add_i32 : INDEX_op_add_i64);
+ op->args[2] = arg_new_constant(ctx, -val);
+ }
+ return false;
+}
+
static bool fold_sub2(OptContext *ctx, TCGOp *op)
{
return fold_addsub2(ctx, op, false);
{
/* We can't do any folding with a load, but we can record bits. */
switch (op->opc) {
+ CASE_OP_32_64(ld8s):
+ ctx->s_mask = MAKE_64BIT_MASK(8, 56);
+ break;
CASE_OP_32_64(ld8u):
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
+ ctx->s_mask = MAKE_64BIT_MASK(9, 55);
+ break;
+ CASE_OP_32_64(ld16s):
+ ctx->s_mask = MAKE_64BIT_MASK(16, 48);
break;
CASE_OP_32_64(ld16u):
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
+ ctx->s_mask = MAKE_64BIT_MASK(17, 47);
+ break;
+ case INDEX_op_ld32s_i64:
+ ctx->s_mask = MAKE_64BIT_MASK(32, 32);
break;
case INDEX_op_ld32u_i64:
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
+ ctx->s_mask = MAKE_64BIT_MASK(33, 31);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return false;
+}
+
+static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
+{
+ TCGTemp *dst, *src;
+ intptr_t ofs;
+ TCGType type;
+
+ if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
+ return false;
+ }
+
+ type = ctx->type;
+ ofs = op->args[2];
+ dst = arg_temp(op->args[0]);
+ src = find_mem_copy_for(ctx, type, ofs);
+ if (src && src->base_type == type) {
+ return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
+ }
+
+ reset_ts(ctx, dst);
+ record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
+ return true;
+}
+
+static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
+{
+ intptr_t ofs = op->args[2];
+ intptr_t lm1;
+
+ if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
+ remove_mem_copy_all(ctx);
+ return false;
+ }
+
+ switch (op->opc) {
+ CASE_OP_32_64(st8):
+ lm1 = 0;
+ break;
+ CASE_OP_32_64(st16):
+ lm1 = 1;
+ break;
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i32:
+ lm1 = 3;
+ break;
+ case INDEX_op_st_i64:
+ lm1 = 7;
+ break;
+ case INDEX_op_st_vec:
+ lm1 = tcg_type_size(ctx->type) - 1;
break;
default:
g_assert_not_reached();
}
+ remove_mem_copy_in(ctx, ofs, ofs + lm1);
+ return false;
+}
+
+static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
+{
+ TCGTemp *src;
+ intptr_t ofs, last;
+ TCGType type;
+
+ if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
+ fold_tcg_st(ctx, op);
+ return false;
+ }
+
+ src = arg_temp(op->args[0]);
+ ofs = op->args[2];
+ type = ctx->type;
+
+ /*
+ * Eliminate duplicate stores of a constant.
+ * This happens frequently when the target ISA zero-extends.
+ */
+ if (ts_is_const(src)) {
+ TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
+ if (src == prev) {
+ tcg_op_remove(ctx->tcg, op);
+ return true;
+ }
+ }
+
+ last = ofs + tcg_type_size(type) - 1;
+ remove_mem_copy_in(ctx, ofs, last);
+ record_mem_copy(ctx, type, src, ofs, last);
return false;
}
static bool fold_xor(OptContext *ctx, TCGOp *op)
{
- if (fold_const2(ctx, op) ||
+ if (fold_const2_commutative(ctx, op) ||
fold_xx_to_i(ctx, op, 0) ||
fold_xi_to_x(ctx, op, 0) ||
fold_xi_to_not(ctx, op, -1)) {
ctx->z_mask = arg_info(op->args[1])->z_mask
| arg_info(op->args[2])->z_mask;
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
return fold_masks(ctx, op);
}
TCGOp *op, *op_next;
OptContext ctx = { .tcg = s };
+ QSIMPLEQ_INIT(&ctx.mem_free);
+
/* Array VALS has an element for each temp.
If this temp holds a constant then its value is kept in VALS' element.
If this temp is a copy of other ones then the other copies are
ctx.type = TCG_TYPE_I32;
}
- /* For commutative operations make constant second argument */
- switch (opc) {
- CASE_OP_32_64_VEC(add):
- CASE_OP_32_64_VEC(mul):
- CASE_OP_32_64_VEC(and):
- CASE_OP_32_64_VEC(or):
- CASE_OP_32_64_VEC(xor):
- CASE_OP_32_64(eqv):
- CASE_OP_32_64(nand):
- CASE_OP_32_64(nor):
- CASE_OP_32_64(muluh):
- CASE_OP_32_64(mulsh):
- swap_commutative(op->args[0], &op->args[1], &op->args[2]);
- break;
- CASE_OP_32_64(brcond):
- if (swap_commutative(-1, &op->args[0], &op->args[1])) {
- op->args[2] = tcg_swap_cond(op->args[2]);
- }
- break;
- CASE_OP_32_64(setcond):
- if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
- op->args[3] = tcg_swap_cond(op->args[3]);
- }
- break;
- CASE_OP_32_64(movcond):
- if (swap_commutative(-1, &op->args[1], &op->args[2])) {
- op->args[5] = tcg_swap_cond(op->args[5]);
- }
- /* For movcond, we canonicalize the "false" input reg to match
- the destination reg so that the tcg backend can implement
- a "move if true" operation. */
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
- op->args[5] = tcg_invert_cond(op->args[5]);
- }
- break;
- CASE_OP_32_64(add2):
- swap_commutative(op->args[0], &op->args[2], &op->args[4]);
- swap_commutative(op->args[1], &op->args[3], &op->args[5]);
- break;
- CASE_OP_32_64(mulu2):
- CASE_OP_32_64(muls2):
- swap_commutative(op->args[0], &op->args[2], &op->args[3]);
- break;
- case INDEX_op_brcond2_i32:
- if (swap_commutative2(&op->args[0], &op->args[2])) {
- op->args[4] = tcg_swap_cond(op->args[4]);
- }
- break;
- case INDEX_op_setcond2_i32:
- if (swap_commutative2(&op->args[1], &op->args[3])) {
- op->args[5] = tcg_swap_cond(op->args[5]);
- }
- break;
- default:
- break;
- }
-
- /* Assume all bits affected, and no bits known zero. */
+ /* Assume all bits affected, no bits known zero, no sign reps. */
ctx.a_mask = -1;
ctx.z_mask = -1;
+ ctx.s_mask = 0;
/*
* Process each opcode.
* Sorted alphabetically by opcode as much as possible.
*/
switch (opc) {
- CASE_OP_32_64_VEC(add):
+ CASE_OP_32_64(add):
done = fold_add(&ctx, op);
break;
+ case INDEX_op_add_vec:
+ done = fold_add_vec(&ctx, op);
+ break;
CASE_OP_32_64(add2):
done = fold_add2(&ctx, op);
break;
case INDEX_op_dup2_vec:
done = fold_dup2(&ctx, op);
break;
- CASE_OP_32_64(eqv):
+ CASE_OP_32_64_VEC(eqv):
done = fold_eqv(&ctx, op);
break;
CASE_OP_32_64(extract):
case INDEX_op_extrh_i64_i32:
done = fold_extu(&ctx, op);
break;
+ CASE_OP_32_64(ld8s):
CASE_OP_32_64(ld8u):
+ CASE_OP_32_64(ld16s):
CASE_OP_32_64(ld16u):
+ case INDEX_op_ld32s_i64:
case INDEX_op_ld32u_i64:
done = fold_tcg_ld(&ctx, op);
break;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld_i64:
+ case INDEX_op_ld_vec:
+ done = fold_tcg_ld_memcopy(&ctx, op);
+ break;
+ CASE_OP_32_64(st8):
+ CASE_OP_32_64(st16):
+ case INDEX_op_st32_i64:
+ done = fold_tcg_st(&ctx, op);
+ break;
+ case INDEX_op_st_i32:
+ case INDEX_op_st_i64:
+ case INDEX_op_st_vec:
+ done = fold_tcg_st_memcopy(&ctx, op);
+ break;
case INDEX_op_mb:
done = fold_mb(&ctx, op);
break;
CASE_OP_32_64(mulu2):
done = fold_multiply2(&ctx, op);
break;
- CASE_OP_32_64(nand):
+ CASE_OP_32_64_VEC(nand):
done = fold_nand(&ctx, op);
break;
CASE_OP_32_64(neg):
done = fold_neg(&ctx, op);
break;
- CASE_OP_32_64(nor):
+ CASE_OP_32_64_VEC(nor):
done = fold_nor(&ctx, op);
break;
CASE_OP_32_64_VEC(not):
CASE_OP_32_64_VEC(orc):
done = fold_orc(&ctx, op);
break;
- case INDEX_op_qemu_ld_i32:
- case INDEX_op_qemu_ld_i64:
+ case INDEX_op_qemu_ld_a32_i32:
+ case INDEX_op_qemu_ld_a64_i32:
+ case INDEX_op_qemu_ld_a32_i64:
+ case INDEX_op_qemu_ld_a64_i64:
+ case INDEX_op_qemu_ld_a32_i128:
+ case INDEX_op_qemu_ld_a64_i128:
done = fold_qemu_ld(&ctx, op);
break;
- case INDEX_op_qemu_st_i32:
- case INDEX_op_qemu_st8_i32:
- case INDEX_op_qemu_st_i64:
+ case INDEX_op_qemu_st8_a32_i32:
+ case INDEX_op_qemu_st8_a64_i32:
+ case INDEX_op_qemu_st_a32_i32:
+ case INDEX_op_qemu_st_a64_i32:
+ case INDEX_op_qemu_st_a32_i64:
+ case INDEX_op_qemu_st_a64_i64:
+ case INDEX_op_qemu_st_a32_i128:
+ case INDEX_op_qemu_st_a64_i128:
done = fold_qemu_st(&ctx, op);
break;
CASE_OP_32_64(rem):
CASE_OP_32_64(setcond):
done = fold_setcond(&ctx, op);
break;
+ CASE_OP_32_64(negsetcond):
+ done = fold_negsetcond(&ctx, op);
+ break;
case INDEX_op_setcond2_i32:
done = fold_setcond2(&ctx, op);
break;
CASE_OP_32_64(sextract):
done = fold_sextract(&ctx, op);
break;
- CASE_OP_32_64_VEC(sub):
+ CASE_OP_32_64(sub):
done = fold_sub(&ctx, op);
break;
+ case INDEX_op_sub_vec:
+ done = fold_sub_vec(&ctx, op);
+ break;
CASE_OP_32_64(sub2):
done = fold_sub2(&ctx, op);
break;