]> git.proxmox.com Git - mirror_qemu.git/blobdiff - tcg/optimize.c
m48t59: register a QOM type for each nvram type we support
[mirror_qemu.git] / tcg / optimize.c
index 0302f4f99a8cf7897770c0dd26c6f6d8065e6915..067917c396f4457e14d5db714842f79b3587fbfa 100644 (file)
@@ -67,6 +67,37 @@ static void reset_temp(TCGArg temp)
     temps[temp].mask = -1;
 }
 
+static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
+                                TCGOpcode opc, int nargs)
+{
+    int oi = s->gen_next_op_idx;
+    int pi = s->gen_next_parm_idx;
+    int prev = old_op->prev;
+    int next = old_op - s->gen_op_buf;
+    TCGOp *new_op;
+
+    tcg_debug_assert(oi < OPC_BUF_SIZE);
+    tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
+    s->gen_next_op_idx = oi + 1;
+    s->gen_next_parm_idx = pi + nargs;
+
+    new_op = &s->gen_op_buf[oi];
+    *new_op = (TCGOp){
+        .opc = opc,
+        .args = pi,
+        .prev = prev,
+        .next = next
+    };
+    if (prev >= 0) {
+        s->gen_op_buf[prev].next = oi;
+    } else {
+        s->gen_first_op_idx = oi;
+    }
+    old_op->prev = oi;
+
+    return new_op;
+}
+
 /* Reset all temporaries, given that there are NB_TEMPS of them.  */
 static void reset_all_temps(int nb_temps)
 {
@@ -83,6 +114,20 @@ static int op_bits(TCGOpcode op)
     return def->flags & TCG_OPF_64BIT ? 64 : 32;
 }
 
+static TCGOpcode op_to_mov(TCGOpcode op)
+{
+    switch (op_bits(op)) {
+    case 32:
+        return INDEX_op_mov_i32;
+    case 64:
+        return INDEX_op_mov_i64;
+    default:
+        fprintf(stderr, "op_to_mov: unexpected return value of "
+                "function op_bits.\n");
+        tcg_abort();
+    }
+}
+
 static TCGOpcode op_to_movi(TCGOpcode op)
 {
     switch (op_bits(op)) {
@@ -148,11 +193,22 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
     return false;
 }
 
-static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
-                            TCGArg dst, TCGArg src)
+static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
+                            TCGOpcode old_op, TCGArg dst, TCGArg src)
 {
+    TCGOpcode new_op = op_to_mov(old_op);
+    tcg_target_ulong mask;
+
+    op->opc = new_op;
+
     reset_temp(dst);
-    temps[dst].mask = temps[src].mask;
+    mask = temps[src].mask;
+    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
+        /* High bits of the destination are now garbage.  */
+        mask |= ~0xffffffffull;
+    }
+    temps[dst].mask = mask;
+
     assert(temps[src].state != TCG_TEMP_CONST);
 
     if (s->temps[src].type == s->temps[dst].type) {
@@ -168,32 +224,30 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
         temps[src].next_copy = dst;
     }
 
-    gen_args[0] = dst;
-    gen_args[1] = src;
+    args[0] = dst;
+    args[1] = src;
 }
 
-static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
+static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
+                             TCGOpcode old_op, TCGArg dst, TCGArg val)
 {
+    TCGOpcode new_op = op_to_movi(old_op);
+    tcg_target_ulong mask;
+
+    op->opc = new_op;
+
     reset_temp(dst);
     temps[dst].state = TCG_TEMP_CONST;
     temps[dst].val = val;
-    temps[dst].mask = val;
-    gen_args[0] = dst;
-    gen_args[1] = val;
-}
-
-static TCGOpcode op_to_mov(TCGOpcode op)
-{
-    switch (op_bits(op)) {
-    case 32:
-        return INDEX_op_mov_i32;
-    case 64:
-        return INDEX_op_mov_i64;
-    default:
-        fprintf(stderr, "op_to_mov: unexpected return value of "
-                "function op_bits.\n");
-        tcg_abort();
+    mask = val;
+    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
+        /* High bits of the destination are now garbage.  */
+        mask |= ~0xffffffffull;
     }
+    temps[dst].mask = mask;
+
+    args[0] = dst;
+    args[1] = val;
 }
 
 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
@@ -510,15 +564,9 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 }
 
 /* Propagate constants and copies, fold constant expressions. */
-static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
-                                    TCGArg *args, TCGOpDef *tcg_op_defs)
+static void tcg_constant_folding(TCGContext *s)
 {
-    int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
-    tcg_target_ulong mask, affected;
-    TCGOpcode op;
-    const TCGOpDef *def;
-    TCGArg *gen_args;
-    TCGArg tmp;
+    int oi, oi_next, nb_temps, nb_globals;
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -529,30 +577,34 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     nb_globals = s->nb_globals;
     reset_all_temps(nb_temps);
 
-    nb_ops = tcg_opc_ptr - s->gen_opc_buf;
-    gen_args = args;
-    for (op_index = 0; op_index < nb_ops; op_index++) {
-        op = s->gen_opc_buf[op_index];
-        def = &tcg_op_defs[op];
-        /* Do copy propagation */
-        if (op == INDEX_op_call) {
-            int nb_oargs = args[0] >> 16;
-            int nb_iargs = args[0] & 0xffff;
-            for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
-                if (temps[args[i]].state == TCG_TEMP_COPY) {
-                    args[i] = find_better_copy(s, args[i]);
-                }
-            }
+    for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
+        tcg_target_ulong mask, partmask, affected;
+        int nb_oargs, nb_iargs, i;
+        TCGArg tmp;
+
+        TCGOp * const op = &s->gen_op_buf[oi];
+        TCGArg * const args = &s->gen_opparam_buf[op->args];
+        TCGOpcode opc = op->opc;
+        const TCGOpDef *def = &tcg_op_defs[opc];
+
+        oi_next = op->next;
+        if (opc == INDEX_op_call) {
+            nb_oargs = op->callo;
+            nb_iargs = op->calli;
         } else {
-            for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
-                if (temps[args[i]].state == TCG_TEMP_COPY) {
-                    args[i] = find_better_copy(s, args[i]);
-                }
+            nb_oargs = def->nb_oargs;
+            nb_iargs = def->nb_iargs;
+        }
+
+        /* Do copy propagation */
+        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+            if (temps[args[i]].state == TCG_TEMP_COPY) {
+                args[i] = find_better_copy(s, args[i]);
             }
         }
 
         /* For commutative operations make constant second argument */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(add):
         CASE_OP_32_64(mul):
         CASE_OP_32_64(and):
@@ -610,7 +662,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
            and "sub r, 0, a => neg r, a" case.  */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(shl):
         CASE_OP_32_64(shr):
         CASE_OP_32_64(sar):
@@ -618,10 +670,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(rotr):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[1]].val == 0) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0);
-                args += 3;
-                gen_args += 2;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
                 continue;
             }
             break;
@@ -634,7 +683,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     /* Proceed with possible constant folding. */
                     break;
                 }
-                if (op == INDEX_op_sub_i32) {
+                if (opc == INDEX_op_sub_i32) {
                     neg_op = INDEX_op_neg_i32;
                     have_neg = TCG_TARGET_HAS_neg_i32;
                 } else {
@@ -646,12 +695,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 }
                 if (temps[args[1]].state == TCG_TEMP_CONST
                     && temps[args[1]].val == 0) {
-                    s->gen_opc_buf[op_index] = neg_op;
+                    op->opc = neg_op;
                     reset_temp(args[0]);
-                    gen_args[0] = args[0];
-                    gen_args[1] = args[2];
-                    args += 3;
-                    gen_args += 2;
+                    args[1] = args[2];
                     continue;
                 }
             }
@@ -705,12 +751,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 if (!have_not) {
                     break;
                 }
-                s->gen_opc_buf[op_index] = not_op;
+                op->opc = not_op;
                 reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[i];
-                args += 3;
-                gen_args += 2;
+                args[1] = args[i];
                 continue;
             }
         default:
@@ -718,7 +761,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         }
 
         /* Simplify expression for "op r, a, const => mov r, a" cases */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(shl):
@@ -746,13 +789,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
         do_mov3:
             if (temps_are_copies(args[0], args[1])) {
-                s->gen_opc_buf[op_index] = INDEX_op_nop;
+                tcg_op_remove(s, op);
             } else {
-                s->gen_opc_buf[op_index] = op_to_mov(op);
-                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
-                gen_args += 2;
+                tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
             }
-            args += 3;
             continue;
         default:
             break;
@@ -762,7 +802,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
            output argument is supported. */
         mask = -1;
         affected = -1;
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(ext8s):
             if ((temps[args[1]].mask & 0x80) != 0) {
                 break;
@@ -858,6 +898,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
 
         CASE_OP_32_64(setcond):
+        case INDEX_op_setcond2_i32:
             mask = 1;
             break;
 
@@ -866,23 +907,18 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
 
         CASE_OP_32_64(ld8u):
-        case INDEX_op_qemu_ld8u:
             mask = 0xff;
             break;
         CASE_OP_32_64(ld16u):
-        case INDEX_op_qemu_ld16u:
             mask = 0xffff;
             break;
         case INDEX_op_ld32u_i64:
-#if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_qemu_ld32u:
-#endif
             mask = 0xffffffffu;
             break;
 
         CASE_OP_32_64(qemu_ld):
             {
-                TCGMemOp mop = args[def->nb_oargs + def->nb_iargs];
+                TCGMemOp mop = args[nb_oargs + nb_iargs];
                 if (!(mop & MO_SIGN)) {
                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
                 }
@@ -893,49 +929,43 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
         }
 
-        /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit
-           results */
-        if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) {
-            mask &= 0xffffffffu;
+        /* 32-bit ops generate 32-bit results.  For the result is zero test
+           below, we can ignore high bits, but for further optimizations we
+           need to record that the high bits contain garbage.  */
+        partmask = mask;
+        if (!(def->flags & TCG_OPF_64BIT)) {
+            mask |= ~(tcg_target_ulong)0xffffffffu;
+            partmask &= 0xffffffffu;
+            affected &= 0xffffffffu;
         }
 
-        if (mask == 0) {
-            assert(def->nb_oargs == 1);
-            s->gen_opc_buf[op_index] = op_to_movi(op);
-            tcg_opt_gen_movi(gen_args, args[0], 0);
-            args += def->nb_oargs + def->nb_iargs + def->nb_cargs;
-            gen_args += 2;
+        if (partmask == 0) {
+            assert(nb_oargs == 1);
+            tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
             continue;
         }
         if (affected == 0) {
-            assert(def->nb_oargs == 1);
+            assert(nb_oargs == 1);
             if (temps_are_copies(args[0], args[1])) {
-                s->gen_opc_buf[op_index] = INDEX_op_nop;
+                tcg_op_remove(s, op);
             } else if (temps[args[1]].state != TCG_TEMP_CONST) {
-                s->gen_opc_buf[op_index] = op_to_mov(op);
-                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
-                gen_args += 2;
+                tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
             } else {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val);
-                gen_args += 2;
+                tcg_opt_gen_movi(s, op, args, opc,
+                                 args[0], temps[args[1]].val);
             }
-            args += def->nb_iargs + 1;
             continue;
         }
 
         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(and):
         CASE_OP_32_64(mul):
         CASE_OP_32_64(muluh):
         CASE_OP_32_64(mulsh):
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0);
-                args += 3;
-                gen_args += 2;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
                 continue;
             }
             break;
@@ -944,18 +974,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         }
 
         /* Simplify expression for "op r, a, a => mov r, a" cases */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
             if (temps_are_copies(args[1], args[2])) {
                 if (temps_are_copies(args[0], args[1])) {
-                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                    tcg_op_remove(s, op);
                 } else {
-                    s->gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
-                    gen_args += 2;
+                    tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
                 }
-                args += 3;
                 continue;
             }
             break;
@@ -964,15 +991,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         }
 
         /* Simplify expression for "op r, a, a => movi r, 0" cases */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(andc):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(xor):
             if (temps_are_copies(args[1], args[2])) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0);
-                gen_args += 2;
-                args += 3;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
                 continue;
             }
             break;
@@ -983,29 +1007,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         /* Propagate constants through copy operations and do constant
            folding.  Constants will be substituted to arguments by register
            allocator where needed and possible.  Also detect copies. */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(mov):
             if (temps_are_copies(args[0], args[1])) {
-                args += 2;
-                s->gen_opc_buf[op_index] = INDEX_op_nop;
+                tcg_op_remove(s, op);
                 break;
             }
             if (temps[args[1]].state != TCG_TEMP_CONST) {
-                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
-                gen_args += 2;
-                args += 2;
+                tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
                 break;
             }
             /* Source argument is constant.  Rewrite the operation and
                let movi case handle it. */
-            op = op_to_movi(op);
-            s->gen_opc_buf[op_index] = op;
             args[1] = temps[args[1]].val;
             /* fallthrough */
         CASE_OP_32_64(movi):
-            tcg_opt_gen_movi(gen_args, args[0], args[1]);
-            gen_args += 2;
-            args += 2;
+            tcg_opt_gen_movi(s, op, args, opc, args[0], args[1]);
             break;
 
         CASE_OP_32_64(not):
@@ -1017,22 +1034,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         case INDEX_op_ext32s_i64:
         case INDEX_op_ext32u_i64:
             if (temps[args[1]].state == TCG_TEMP_CONST) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tmp = do_constant_folding(op, temps[args[1]].val, 0);
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
-                gen_args += 2;
-                args += 2;
+                tmp = do_constant_folding(opc, temps[args[1]].val, 0);
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
                 break;
             }
             goto do_default;
 
         case INDEX_op_trunc_shr_i32:
             if (temps[args[1]].state == TCG_TEMP_CONST) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tmp = do_constant_folding(op, temps[args[1]].val, args[2]);
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
-                gen_args += 2;
-                args += 3;
+                tmp = do_constant_folding(opc, temps[args[1]].val, args[2]);
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1061,12 +1072,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(remu):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tmp = do_constant_folding(op, temps[args[1]].val,
+                tmp = do_constant_folding(opc, temps[args[1]].val,
                                           temps[args[2]].val);
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
-                gen_args += 2;
-                args += 3;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1074,58 +1082,46 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(deposit):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = deposit64(temps[args[1]].val, args[3], args[4],
                                 temps[args[2]].val);
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
-                gen_args += 2;
-                args += 5;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
                 break;
             }
             goto do_default;
 
         CASE_OP_32_64(setcond):
-            tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
+            tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
             if (tmp != 2) {
-                s->gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
-                gen_args += 2;
-                args += 4;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
                 break;
             }
             goto do_default;
 
         CASE_OP_32_64(brcond):
-            tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
+            tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
             if (tmp != 2) {
                 if (tmp) {
                     reset_all_temps(nb_temps);
-                    s->gen_opc_buf[op_index] = INDEX_op_br;
-                    gen_args[0] = args[3];
-                    gen_args += 1;
+                    op->opc = INDEX_op_br;
+                    args[0] = args[3];
                 } else {
-                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                    tcg_op_remove(s, op);
                 }
-                args += 4;
                 break;
             }
             goto do_default;
 
         CASE_OP_32_64(movcond):
-            tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
+            tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
             if (tmp != 2) {
                 if (temps_are_copies(args[0], args[4-tmp])) {
-                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                    tcg_op_remove(s, op);
                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
-                    s->gen_opc_buf[op_index] = op_to_movi(op);
-                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
-                    gen_args += 2;
+                    tcg_opt_gen_movi(s, op, args, opc,
+                                     args[0], temps[args[4-tmp]].val);
                 } else {
-                    s->gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
-                    gen_args += 2;
+                    tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]);
                 }
-                args += 6;
                 break;
             }
             goto do_default;
@@ -1143,24 +1139,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 uint64_t a = ((uint64_t)ah << 32) | al;
                 uint64_t b = ((uint64_t)bh << 32) | bl;
                 TCGArg rl, rh;
+                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
 
-                if (op == INDEX_op_add2_i32) {
+                if (opc == INDEX_op_add2_i32) {
                     a += b;
                 } else {
                     a -= b;
                 }
 
-                /* We emit the extra nop when we emit the add2/sub2.  */
-                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
-
                 rl = args[0];
                 rh = args[1];
-                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
-                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
-                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
-                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
-                gen_args += 4;
-                args += 6;
+                tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a);
+                tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32));
+
+                /* We've done all we need to do with the movi.  Skip it.  */
+                oi_next = op2->next;
                 break;
             }
             goto do_default;
@@ -1172,18 +1166,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 uint32_t b = temps[args[3]].val;
                 uint64_t r = (uint64_t)a * b;
                 TCGArg rl, rh;
-
-                /* We emit the extra nop when we emit the mulu2.  */
-                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
 
                 rl = args[0];
                 rh = args[1];
-                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
-                s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
-                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
-                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
-                gen_args += 4;
-                args += 4;
+                tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)r);
+                tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(r >> 32));
+
+                /* We've done all we need to do with the movi.  Skip it.  */
+                oi_next = op2->next;
                 break;
             }
             goto do_default;
@@ -1192,12 +1184,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
             if (tmp != 2) {
                 if (tmp) {
+            do_brcond_true:
                     reset_all_temps(nb_temps);
-                    s->gen_opc_buf[op_index] = INDEX_op_br;
-                    gen_args[0] = args[5];
-                    gen_args += 1;
+                    op->opc = INDEX_op_br;
+                    args[0] = args[5];
                 } else {
-                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+            do_brcond_false:
+                    tcg_op_remove(s, op);
                 }
             } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
                        && temps[args[2]].state == TCG_TEMP_CONST
@@ -1206,25 +1199,64 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                        && temps[args[3]].val == 0) {
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
+            do_brcond_high:
                 reset_all_temps(nb_temps);
-                s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
-                gen_args[0] = args[1];
-                gen_args[1] = args[3];
-                gen_args[2] = args[4];
-                gen_args[3] = args[5];
-                gen_args += 4;
+                op->opc = INDEX_op_brcond_i32;
+                args[0] = args[1];
+                args[1] = args[3];
+                args[2] = args[4];
+                args[3] = args[5];
+            } else if (args[4] == TCG_COND_EQ) {
+                /* Simplify EQ comparisons where one of the pairs
+                   can be simplified.  */
+                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
+                                               args[0], args[2], TCG_COND_EQ);
+                if (tmp == 0) {
+                    goto do_brcond_false;
+                } else if (tmp == 1) {
+                    goto do_brcond_high;
+                }
+                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
+                                               args[1], args[3], TCG_COND_EQ);
+                if (tmp == 0) {
+                    goto do_brcond_false;
+                } else if (tmp != 1) {
+                    goto do_default;
+                }
+            do_brcond_low:
+                reset_all_temps(nb_temps);
+                op->opc = INDEX_op_brcond_i32;
+                args[1] = args[2];
+                args[2] = args[4];
+                args[3] = args[5];
+            } else if (args[4] == TCG_COND_NE) {
+                /* Simplify NE comparisons where one of the pairs
+                   can be simplified.  */
+                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
+                                               args[0], args[2], TCG_COND_NE);
+                if (tmp == 0) {
+                    goto do_brcond_high;
+                } else if (tmp == 1) {
+                    goto do_brcond_true;
+                }
+                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
+                                               args[1], args[3], TCG_COND_NE);
+                if (tmp == 0) {
+                    goto do_brcond_low;
+                } else if (tmp == 1) {
+                    goto do_brcond_true;
+                }
+                goto do_default;
             } else {
                 goto do_default;
             }
-            args += 6;
             break;
 
         case INDEX_op_setcond2_i32:
             tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
             if (tmp != 2) {
-                s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
-                tcg_opt_gen_movi(gen_args, args[0], tmp);
-                gen_args += 2;
+            do_setcond_const:
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
             } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
                        && temps[args[3]].state == TCG_TEMP_CONST
                        && temps[args[4]].state == TCG_TEMP_CONST
@@ -1232,38 +1264,67 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                        && temps[args[4]].val == 0) {
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
-                s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
+            do_setcond_high:
+                reset_temp(args[0]);
+                temps[args[0]].mask = 1;
+                op->opc = INDEX_op_setcond_i32;
+                args[1] = args[2];
+                args[2] = args[4];
+                args[3] = args[5];
+            } else if (args[5] == TCG_COND_EQ) {
+                /* Simplify EQ comparisons where one of the pairs
+                   can be simplified.  */
+                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
+                                               args[1], args[3], TCG_COND_EQ);
+                if (tmp == 0) {
+                    goto do_setcond_const;
+                } else if (tmp == 1) {
+                    goto do_setcond_high;
+                }
+                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
+                                               args[2], args[4], TCG_COND_EQ);
+                if (tmp == 0) {
+                    goto do_setcond_high;
+                } else if (tmp != 1) {
+                    goto do_default;
+                }
+            do_setcond_low:
                 reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[2];
-                gen_args[2] = args[4];
-                gen_args[3] = args[5];
-                gen_args += 4;
+                temps[args[0]].mask = 1;
+                op->opc = INDEX_op_setcond_i32;
+                args[2] = args[3];
+                args[3] = args[5];
+            } else if (args[5] == TCG_COND_NE) {
+                /* Simplify NE comparisons where one of the pairs
+                   can be simplified.  */
+                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
+                                               args[1], args[3], TCG_COND_NE);
+                if (tmp == 0) {
+                    goto do_setcond_high;
+                } else if (tmp == 1) {
+                    goto do_setcond_const;
+                }
+                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
+                                               args[2], args[4], TCG_COND_NE);
+                if (tmp == 0) {
+                    goto do_setcond_low;
+                } else if (tmp == 1) {
+                    goto do_setcond_const;
+                }
+                goto do_default;
             } else {
                 goto do_default;
             }
-            args += 6;
             break;
 
         case INDEX_op_call:
-            nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
-            if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
-                                            TCG_CALL_NO_WRITE_GLOBALS))) {
+            if (!(args[nb_oargs + nb_iargs + 1]
+                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
                 for (i = 0; i < nb_globals; i++) {
                     reset_temp(i);
                 }
             }
-            for (i = 0; i < (args[0] >> 16); i++) {
-                reset_temp(args[i + 1]);
-            }
-            i = nb_call_args + 3;
-            while (i) {
-                *gen_args = *args;
-                args++;
-                gen_args++;
-                i--;
-            }
-            break;
+            goto do_reset_output;
 
         default:
         do_default:
@@ -1275,7 +1336,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (def->flags & TCG_OPF_BB_END) {
                 reset_all_temps(nb_temps);
             } else {
-                for (i = 0; i < def->nb_oargs; i++) {
+        do_reset_output:
+                for (i = 0; i < nb_oargs; i++) {
                     reset_temp(args[i]);
                     /* Save the corresponding known-zero bits mask for the
                        first output argument (only one supported so far). */
@@ -1284,22 +1346,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     }
                 }
             }
-            for (i = 0; i < def->nb_args; i++) {
-                gen_args[i] = args[i];
-            }
-            args += def->nb_args;
-            gen_args += def->nb_args;
             break;
         }
     }
-
-    return gen_args;
 }
 
-TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
-        TCGArg *args, TCGOpDef *tcg_op_defs)
+void tcg_optimize(TCGContext *s)
 {
-    TCGArg *res;
-    res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
-    return res;
+    tcg_constant_folding(s);
 }