]> git.proxmox.com Git - qemu.git/blobdiff - target-sh4/translate.c
SH4: convert a few helpers to TCG
[qemu.git] / target-sh4 / translate.c
index bb6786a03bd829c7356534f179af5d02c608ca66..19486d7a1f96917cda30e6cb60333257b7f4a29b 100644 (file)
@@ -31,6 +31,7 @@
 #include "cpu.h"
 #include "exec-all.h"
 #include "disas.h"
+#include "helper.h"
 #include "tcg-op.h"
 #include "qemu-common.h"
 
@@ -56,6 +57,86 @@ enum {
     BS_EXCP     = 3, /* We reached an exception condition */
 };
 
+/* global register indexes */
+static TCGv cpu_env;
+static TCGv cpu_gregs[24];
+static TCGv cpu_pc, cpu_sr, cpu_ssr, cpu_spc, cpu_gbr;
+static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
+static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_flags;
+
+/* internal register indexes */
+static TCGv cpu_flags, cpu_delayed_pc;
+
+/* dyngen register indexes */
+static TCGv cpu_T[2];
+
+#include "gen-icount.h"
+
+static void sh4_translate_init(void)
+{
+    int i;
+    static int done_init = 0;
+    static const char * const gregnames[24] = {
+        "R0_BANK0", "R1_BANK0", "R2_BANK0", "R3_BANK0",
+        "R4_BANK0", "R5_BANK0", "R6_BANK0", "R7_BANK0",
+        "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
+        "R0_BANK1", "R1_BANK1", "R2_BANK1", "R3_BANK1",
+        "R4_BANK1", "R5_BANK1", "R6_BANK1", "R7_BANK1"
+    };
+
+    if (done_init)
+        return;
+
+    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
+    cpu_T[0] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG1, "T0");
+    cpu_T[1] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG2, "T1");
+
+    for (i = 0; i < 24; i++)
+        cpu_gregs[i] = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                          offsetof(CPUState, gregs[i]),
+                                          gregnames[i]);
+
+    cpu_pc = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                offsetof(CPUState, pc), "PC");
+    cpu_sr = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                offsetof(CPUState, sr), "SR");
+    cpu_ssr = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                 offsetof(CPUState, ssr), "SSR");
+    cpu_spc = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                 offsetof(CPUState, spc), "SPC");
+    cpu_gbr = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                 offsetof(CPUState, gbr), "GBR");
+    cpu_vbr = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                 offsetof(CPUState, vbr), "VBR");
+    cpu_sgr = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                 offsetof(CPUState, sgr), "SGR");
+    cpu_dbr = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                 offsetof(CPUState, dbr), "DBR");
+    cpu_mach = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                  offsetof(CPUState, mach), "MACH");
+    cpu_macl = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                  offsetof(CPUState, macl), "MACL");
+    cpu_pr = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                offsetof(CPUState, pr), "PR");
+    cpu_fpscr = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                   offsetof(CPUState, fpscr), "FPSCR");
+    cpu_fpul = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                  offsetof(CPUState, fpul), "FPUL");
+
+    cpu_flags = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                  offsetof(CPUState, flags), "_flags_");
+    cpu_delayed_pc = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+                                       offsetof(CPUState, delayed_pc),
+                                       "_delayed_pc_");
+
+    /* register helpers */
+#undef DEF_HELPER
+#define DEF_HELPER(ret, name, params) tcg_register_helper(name, #name);
+#include "helper.h"
+
+    done_init = 1;
+}
+
 #ifdef CONFIG_USER_ONLY
 
 #define GEN_OP_LD(width, reg) \
@@ -102,6 +183,10 @@ void cpu_dump_state(CPUState * env, FILE * f,
     int i;
     cpu_fprintf(f, "pc=0x%08x sr=0x%08x pr=0x%08x fpscr=0x%08x\n",
                env->pc, env->sr, env->pr, env->fpscr);
+    cpu_fprintf(f, "spc=0x%08x ssr=0x%08x gbr=0x%08x vbr=0x%08x\n",
+               env->spc, env->ssr, env->gbr, env->vbr);
+    cpu_fprintf(f, "sgr=0x%08x dbr=0x%08x delayed_pc=0x%08x fpul=0x%08x\n",
+               env->sgr, env->dbr, env->delayed_pc, env->fpul);
     for (i = 0; i < 24; i += 4) {
        cpu_fprintf(f, "r%d=0x%08x r%d=0x%08x r%d=0x%08x r%d=0x%08x\n",
                    i, env->gregs[i], i + 1, env->gregs[i + 1],
@@ -143,6 +228,7 @@ CPUSH4State *cpu_sh4_init(const char *cpu_model)
     if (!env)
        return NULL;
     cpu_exec_init(env);
+    sh4_translate_init();
     cpu_sh4_reset(env);
     tlb_flush(env, 1);
     return env;
@@ -157,12 +243,12 @@ static void gen_goto_tb(DisasContext * ctx, int n, target_ulong dest)
        !ctx->singlestep_enabled) {
        /* Use a direct jump if in same page and singlestep not enabled */
         tcg_gen_goto_tb(n);
-        gen_op_movl_imm_PC(dest);
+        tcg_gen_movi_i32(cpu_pc, dest);
         tcg_gen_exit_tb((long) tb + n);
     } else {
-        gen_op_movl_imm_PC(dest);
+        tcg_gen_movi_i32(cpu_pc, dest);
         if (ctx->singlestep_enabled)
-            gen_op_debug();
+            tcg_gen_helper_0_0(helper_debug);
         tcg_gen_exit_tb(0);
     }
 }
@@ -172,15 +258,25 @@ static void gen_jump(DisasContext * ctx)
     if (ctx->delayed_pc == (uint32_t) - 1) {
        /* Target is not statically known, it comes necessarily from a
           delayed jump as immediate jump are conditinal jumps */
-       gen_op_movl_delayed_pc_PC();
+       tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
        if (ctx->singlestep_enabled)
-           gen_op_debug();
+           tcg_gen_helper_0_0(helper_debug);
        tcg_gen_exit_tb(0);
     } else {
        gen_goto_tb(ctx, 0, ctx->delayed_pc);
     }
 }
 
+static inline void gen_branch_slot(uint32_t delayed_pc, int t)
+{
+    int label = gen_new_label();
+    tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc);
+    tcg_gen_andi_i32(cpu_T[0], cpu_sr, SR_T);
+    tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0], t ? SR_T : 0, label);
+    tcg_gen_ori_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
+    gen_set_label(label);
+}
+
 /* Immediate conditional jump (bt or bf) */
 static void gen_conditional_jump(DisasContext * ctx,
                                 target_ulong ift, target_ulong ifnott)
@@ -188,7 +284,8 @@ static void gen_conditional_jump(DisasContext * ctx,
     int l1;
 
     l1 = gen_new_label();
-    gen_op_jT(l1);
+    tcg_gen_andi_i32(cpu_T[0], cpu_sr, SR_T);
+    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_T[0], SR_T, l1);
     gen_goto_tb(ctx, 0, ifnott);
     gen_set_label(l1);
     gen_goto_tb(ctx, 1, ift);
@@ -200,12 +297,54 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
     int l1;
 
     l1 = gen_new_label();
-    gen_op_jdelayed(l1);
+    tcg_gen_andi_i32(cpu_T[0], cpu_flags, DELAY_SLOT_TRUE);
+    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_T[0], DELAY_SLOT_TRUE, l1);
     gen_goto_tb(ctx, 1, ctx->pc + 2);
     gen_set_label(l1);
+    tcg_gen_andi_i32(cpu_flags, cpu_flags, ~DELAY_SLOT_TRUE);
     gen_jump(ctx);
 }
 
+static inline void gen_set_t(void)
+{
+    tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_T);
+}
+
+static inline void gen_clr_t(void)
+{
+    tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+}
+
+static inline void gen_cmp(int cond, TCGv t0, TCGv t1)
+{
+    int label1 = gen_new_label();
+    int label2 = gen_new_label();
+    tcg_gen_brcond_i32(cond, t1, t0, label1);
+    gen_clr_t();
+    tcg_gen_br(label2);
+    gen_set_label(label1);
+    gen_set_t();
+    gen_set_label(label2);
+}
+
+static inline void gen_cmp_imm(int cond, TCGv t0, int32_t imm)
+{
+    int label1 = gen_new_label();
+    int label2 = gen_new_label();
+    tcg_gen_brcondi_i32(cond, t0, imm, label1);
+    gen_clr_t();
+    tcg_gen_br(label2);
+    gen_set_label(label1);
+    gen_set_t();
+    gen_set_label(label2);
+}
+
+static inline void gen_store_flags(uint32_t flags)
+{
+    tcg_gen_andi_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
+    tcg_gen_ori_i32(cpu_flags, cpu_flags, flags);
+}
+
 #define B3_0 (ctx->opcode & 0xf)
 #define B6_4 ((ctx->opcode >> 4) & 0x7)
 #define B7_4 ((ctx->opcode >> 4) & 0xf)
@@ -229,7 +368,7 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
 
 #define CHECK_NOT_DELAY_SLOT \
   if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) \
-  {gen_op_raise_slot_illegal_instruction (); ctx->bstate = BS_EXCP; \
+  {tcg_gen_helper_0_0(helper_raise_slot_illegal_instruction); ctx->bstate = BS_EXCP; \
    return;}
 
 void _decode_opc(DisasContext * ctx)
@@ -239,35 +378,43 @@ void _decode_opc(DisasContext * ctx)
 #endif
     switch (ctx->opcode) {
     case 0x0019:               /* div0u */
-       gen_op_div0u();
+       tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(SR_M | SR_Q | SR_T));
        return;
     case 0x000b:               /* rts */
-       CHECK_NOT_DELAY_SLOT gen_op_rts();
+       CHECK_NOT_DELAY_SLOT
+       tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
        ctx->flags |= DELAY_SLOT;
        ctx->delayed_pc = (uint32_t) - 1;
        return;
     case 0x0028:               /* clrmac */
-       gen_op_clrmac();
+       tcg_gen_movi_i32(cpu_mach, 0);
+       tcg_gen_movi_i32(cpu_macl, 0);
        return;
     case 0x0048:               /* clrs */
-       gen_op_clrs();
+       tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_S);
        return;
     case 0x0008:               /* clrt */
-       gen_op_clrt();
+       gen_clr_t();
        return;
     case 0x0038:               /* ldtlb */
+#if defined(CONFIG_USER_ONLY)
        assert(0);              /* XXXXX */
+#else
+       tcg_gen_helper_0_0(helper_ldtlb);
+#endif
        return;
     case 0x002b:               /* rte */
-       CHECK_NOT_DELAY_SLOT gen_op_rte();
+       CHECK_NOT_DELAY_SLOT
+       tcg_gen_mov_i32(cpu_sr, cpu_ssr);
+       tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
        ctx->flags |= DELAY_SLOT;
        ctx->delayed_pc = (uint32_t) - 1;
        return;
     case 0x0058:               /* sets */
-       gen_op_sets();
+       tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_S);
        return;
     case 0x0018:               /* sett */
-       gen_op_sett();
+       gen_set_t();
        return;
     case 0xfbfd:               /* frchg */
        gen_op_frchg();
@@ -280,355 +427,388 @@ void _decode_opc(DisasContext * ctx)
     case 0x0009:               /* nop */
        return;
     case 0x001b:               /* sleep */
-       assert(0);              /* XXXXX */
+       if (ctx->memidx) {
+               tcg_gen_helper_0_0(helper_sleep);
+       } else {
+               tcg_gen_helper_0_0(helper_raise_illegal_instruction);
+               ctx->bstate = BS_EXCP;
+       }
        return;
     }
 
     switch (ctx->opcode & 0xf000) {
     case 0x1000:               /* mov.l Rm,@(disp,Rn) */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_addl_imm_T1(B3_0 * 4);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_addi_i32(cpu_T[1], cpu_T[1], B3_0 * 4);
        gen_op_stl_T0_T1(ctx);
        return;
     case 0x5000:               /* mov.l @(disp,Rm),Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_addl_imm_T0(B3_0 * 4);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_addi_i32(cpu_T[0], cpu_T[0], B3_0 * 4);
        gen_op_ldl_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0xe000:               /* mov #imm,Rn */
-       gen_op_movl_imm_rN(B7_0s, REG(B11_8));
+       tcg_gen_movi_i32(cpu_gregs[REG(B11_8)], B7_0s);
        return;
     case 0x9000:               /* mov.w @(disp,PC),Rn */
-       gen_op_movl_imm_T0(ctx->pc + 4 + B7_0 * 2);
+       tcg_gen_movi_i32(cpu_T[0], ctx->pc + 4 + B7_0 * 2);
        gen_op_ldw_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0xd000:               /* mov.l @(disp,PC),Rn */
-       gen_op_movl_imm_T0((ctx->pc + 4 + B7_0 * 4) & ~3);
+       tcg_gen_movi_i32(cpu_T[0], (ctx->pc + 4 + B7_0 * 4) & ~3);
        gen_op_ldl_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x7000:               /* add #imm,Rn */
-       gen_op_add_imm_rN(B7_0s, REG(B11_8));
+       tcg_gen_addi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], B7_0s);
        return;
     case 0xa000:               /* bra disp */
        CHECK_NOT_DELAY_SLOT
-           gen_op_bra(ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2);
+       ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
+       tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
        ctx->flags |= DELAY_SLOT;
        return;
     case 0xb000:               /* bsr disp */
        CHECK_NOT_DELAY_SLOT
-           gen_op_bsr(ctx->pc + 4, ctx->delayed_pc =
-                      ctx->pc + 4 + B11_0s * 2);
+       tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
+       ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
+       tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
        ctx->flags |= DELAY_SLOT;
        return;
     }
 
     switch (ctx->opcode & 0xf00f) {
     case 0x6003:               /* mov Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x2000:               /* mov.b Rm,@Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_stb_T0_T1(ctx);
        return;
     case 0x2001:               /* mov.w Rm,@Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_stw_T0_T1(ctx);
        return;
     case 0x2002:               /* mov.l Rm,@Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_stl_T0_T1(ctx);
        return;
     case 0x6000:               /* mov.b @Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
        gen_op_ldb_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x6001:               /* mov.w @Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
        gen_op_ldw_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x6002:               /* mov.l @Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
        gen_op_ldl_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x2004:               /* mov.b Rm,@-Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_dec1_rN(REG(B11_8));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_stb_T0_T1(ctx);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_subi_i32(cpu_gregs[REG(B11_8)],
+                        cpu_gregs[REG(B11_8)], 1);     /* modify register status */
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_addi_i32(cpu_gregs[REG(B11_8)],
+                        cpu_gregs[REG(B11_8)], 1);     /* recover register status */
+       gen_op_stb_T0_T1(ctx);                          /* might cause re-execution */
+       tcg_gen_subi_i32(cpu_gregs[REG(B11_8)],
+                        cpu_gregs[REG(B11_8)], 1);     /* modify register status */
        return;
     case 0x2005:               /* mov.w Rm,@-Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_dec2_rN(REG(B11_8));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_subi_i32(cpu_gregs[REG(B11_8)],
+                        cpu_gregs[REG(B11_8)], 2);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_addi_i32(cpu_gregs[REG(B11_8)],
+                        cpu_gregs[REG(B11_8)], 2);
        gen_op_stw_T0_T1(ctx);
+       tcg_gen_subi_i32(cpu_gregs[REG(B11_8)],
+                        cpu_gregs[REG(B11_8)], 2);
        return;
     case 0x2006:               /* mov.l Rm,@-Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_dec4_rN(REG(B11_8));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_subi_i32(cpu_gregs[REG(B11_8)],
+                        cpu_gregs[REG(B11_8)], 4);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_addi_i32(cpu_gregs[REG(B11_8)],
+                        cpu_gregs[REG(B11_8)], 4);
        gen_op_stl_T0_T1(ctx);
+       tcg_gen_subi_i32(cpu_gregs[REG(B11_8)],
+                        cpu_gregs[REG(B11_8)], 4);
        return;
     case 0x6004:               /* mov.b @Rm+,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
        gen_op_ldb_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        if ( B11_8 != B7_4 )
-               gen_op_inc1_rN(REG(B7_4));
+               tcg_gen_addi_i32(cpu_gregs[REG(B7_4)],
+                                cpu_gregs[REG(B7_4)], 1);
        return;
     case 0x6005:               /* mov.w @Rm+,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
        gen_op_ldw_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        if ( B11_8 != B7_4 )
-               gen_op_inc2_rN(REG(B7_4));
+               tcg_gen_addi_i32(cpu_gregs[REG(B7_4)],
+                                cpu_gregs[REG(B7_4)], 2);
        return;
     case 0x6006:               /* mov.l @Rm+,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
        gen_op_ldl_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        if ( B11_8 != B7_4 )
-               gen_op_inc4_rN(REG(B7_4));
+               tcg_gen_addi_i32(cpu_gregs[REG(B7_4)],
+                                cpu_gregs[REG(B7_4)], 4);
        return;
     case 0x0004:               /* mov.b Rm,@(R0,Rn) */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_add_rN_T1(REG(0));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_add_i32(cpu_T[1], cpu_T[1], cpu_gregs[REG(0)]);
        gen_op_stb_T0_T1(ctx);
        return;
     case 0x0005:               /* mov.w Rm,@(R0,Rn) */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_add_rN_T1(REG(0));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_add_i32(cpu_T[1], cpu_T[1], cpu_gregs[REG(0)]);
        gen_op_stw_T0_T1(ctx);
        return;
     case 0x0006:               /* mov.l Rm,@(R0,Rn) */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_add_rN_T1(REG(0));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_add_i32(cpu_T[1], cpu_T[1], cpu_gregs[REG(0)]);
        gen_op_stl_T0_T1(ctx);
        return;
     case 0x000c:               /* mov.b @(R0,Rm),Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_add_rN_T0(REG(0));
+       tcg_gen_add_i32(cpu_T[0], cpu_gregs[REG(B7_4)], cpu_gregs[REG(0)]);
        gen_op_ldb_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x000d:               /* mov.w @(R0,Rm),Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_add_rN_T0(REG(0));
+       tcg_gen_add_i32(cpu_T[0], cpu_gregs[REG(B7_4)], cpu_gregs[REG(0)]);
        gen_op_ldw_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x000e:               /* mov.l @(R0,Rm),Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_add_rN_T0(REG(0));
+       tcg_gen_add_i32(cpu_T[0], cpu_gregs[REG(B7_4)], cpu_gregs[REG(0)]);
        gen_op_ldl_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x6008:               /* swap.b Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_swapb_T0();
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_andi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B7_4)], 0xffff0000);
+       tcg_gen_andi_i32(cpu_T[0], cpu_gregs[REG(B7_4)], 0xff);
+       tcg_gen_shli_i32(cpu_T[0], cpu_T[0], 8);
+       tcg_gen_or_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], cpu_T[0]);
+       tcg_gen_shri_i32(cpu_T[0], cpu_gregs[REG(B7_4)], 8);
+       tcg_gen_andi_i32(cpu_T[0], cpu_T[0], 0xff);
+       tcg_gen_or_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x6009:               /* swap.w Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_swapw_T0();
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_andi_i32(cpu_T[0], cpu_gregs[REG(B7_4)], 0xffff);
+       tcg_gen_shli_i32(cpu_T[0], cpu_T[0], 16);
+       tcg_gen_shri_i32(cpu_T[1], cpu_gregs[REG(B7_4)], 16);
+       tcg_gen_andi_i32(cpu_T[1], cpu_T[1], 0xffff);
+       tcg_gen_or_i32(cpu_gregs[REG(B11_8)], cpu_T[0], cpu_T[1]);
        return;
     case 0x200d:               /* xtrct Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_xtrct_T0_T1();
-       gen_op_movl_T1_rN(REG(B11_8));
+       tcg_gen_andi_i32(cpu_T[0], cpu_gregs[REG(B7_4)], 0xffff);
+       tcg_gen_shli_i32(cpu_T[0], cpu_T[0], 16);
+       tcg_gen_shri_i32(cpu_T[1], cpu_gregs[REG(B11_8)], 16);
+       tcg_gen_andi_i32(cpu_T[1], cpu_T[1], 0xffff);
+       tcg_gen_or_i32(cpu_gregs[REG(B11_8)], cpu_T[0], cpu_T[1]);
        return;
     case 0x300c:               /* add Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_add_T0_rN(REG(B11_8));
+       tcg_gen_add_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], cpu_gregs[REG(B7_4)]);
        return;
     case 0x300e:               /* addc Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_addc_T0_T1();
-       gen_op_movl_T1_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[1]);
        return;
     case 0x300f:               /* addv Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_addv_T0_T1();
-       gen_op_movl_T1_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[1]);
        return;
     case 0x2009:               /* and Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_and_T0_rN(REG(B11_8));
+       tcg_gen_and_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], cpu_gregs[REG(B7_4)]);
        return;
     case 0x3000:               /* cmp/eq Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_cmp_eq_T0_T1();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       gen_cmp(TCG_COND_EQ, cpu_T[0], cpu_T[1]);
        return;
     case 0x3003:               /* cmp/ge Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_cmp_ge_T0_T1();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       gen_cmp(TCG_COND_GE, cpu_T[0], cpu_T[1]);
        return;
     case 0x3007:               /* cmp/gt Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_cmp_gt_T0_T1();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       gen_cmp(TCG_COND_GT, cpu_T[0], cpu_T[1]);
        return;
     case 0x3006:               /* cmp/hi Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_cmp_hi_T0_T1();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       gen_cmp(TCG_COND_GTU, cpu_T[0], cpu_T[1]);
        return;
     case 0x3002:               /* cmp/hs Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_cmp_hs_T0_T1();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       gen_cmp(TCG_COND_GEU, cpu_T[0], cpu_T[1]);
        return;
     case 0x200c:               /* cmp/str Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_cmp_str_T0_T1();
        return;
     case 0x2007:               /* div0s Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_div0s_T0_T1();
        return;
     case 0x3004:               /* div1 Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_div1_T0_T1();
-       gen_op_movl_T1_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[1]);
        return;
     case 0x300d:               /* dmuls.l Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_dmulsl_T0_T1();
        return;
     case 0x3005:               /* dmulu.l Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_dmulul_T0_T1();
        return;
     case 0x600e:               /* exts.b Rm,Rn */
-       gen_op_movb_rN_T0(REG(B7_4));
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_andi_i32(cpu_T[0], cpu_T[0], 0xff);
+       tcg_gen_ext8s_i32(cpu_T[0], cpu_T[0]);
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x600f:               /* exts.w Rm,Rn */
-       gen_op_movw_rN_T0(REG(B7_4));
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_andi_i32(cpu_T[0], cpu_T[0], 0xffff);
+       tcg_gen_ext16s_i32(cpu_T[0], cpu_T[0]);
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x600c:               /* extu.b Rm,Rn */
-       gen_op_movub_rN_T0(REG(B7_4));
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_andi_i32(cpu_T[0], cpu_T[0], 0xff);
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x600d:               /* extu.w Rm,Rn */
-       gen_op_movuw_rN_T0(REG(B7_4));
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_andi_i32(cpu_T[0], cpu_T[0], 0xffff);
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x000f:               /* mac.l @Rm+,@Rn+ */
-       gen_op_movl_rN_T0(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
        gen_op_ldl_T0_T0(ctx);
-       gen_op_movl_T0_T1();
-       gen_op_inc4_rN(REG(B11_8));
-       gen_op_movl_rN_T0(REG(B7_4));
+       tcg_gen_mov_i32(cpu_T[1], cpu_T[0]);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
        gen_op_ldl_T0_T0(ctx);
        gen_op_macl_T0_T1();
-       gen_op_inc4_rN(REG(B7_4));
+       tcg_gen_addi_i32(cpu_gregs[REG(B7_4)], cpu_gregs[REG(B7_4)], 4);
+       tcg_gen_addi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 4);
        return;
     case 0x400f:               /* mac.w @Rm+,@Rn+ */
-       gen_op_movl_rN_T0(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
        gen_op_ldl_T0_T0(ctx);
-       gen_op_movl_T0_T1();
-       gen_op_inc2_rN(REG(B11_8));
-       gen_op_movl_rN_T0(REG(B7_4));
+       tcg_gen_mov_i32(cpu_T[1], cpu_T[0]);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
        gen_op_ldl_T0_T0(ctx);
        gen_op_macw_T0_T1();
-       gen_op_inc2_rN(REG(B7_4));
+       tcg_gen_addi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 2);
+       tcg_gen_addi_i32(cpu_gregs[REG(B7_4)], cpu_gregs[REG(B7_4)], 2);
        return;
     case 0x0007:               /* mul.l Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_mull_T0_T1();
        return;
     case 0x200f:               /* muls.w Rm,Rn */
-       gen_op_movw_rN_T0(REG(B7_4));
-       gen_op_movw_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_andi_i32(cpu_T[0], cpu_T[0], 0xffff);
+       tcg_gen_ext16s_i32(cpu_T[0], cpu_T[0]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_andi_i32(cpu_T[1], cpu_T[1], 0xffff);
+       tcg_gen_ext16s_i32(cpu_T[1], cpu_T[1]);
        gen_op_mulsw_T0_T1();
        return;
     case 0x200e:               /* mulu.w Rm,Rn */
-       gen_op_movuw_rN_T0(REG(B7_4));
-       gen_op_movuw_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_andi_i32(cpu_T[0], cpu_T[0], 0xffff);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_andi_i32(cpu_T[1], cpu_T[1], 0xffff);
        gen_op_muluw_T0_T1();
        return;
     case 0x600b:               /* neg Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_neg_T0();
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_neg_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B7_4)]);
        return;
     case 0x600a:               /* negc Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
        gen_op_negc_T0();
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x6007:               /* not Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_not_T0();
-       gen_op_movl_T0_rN(REG(B11_8));
+       tcg_gen_not_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B7_4)]);
        return;
     case 0x200b:               /* or Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_or_T0_rN(REG(B11_8));
+       tcg_gen_or_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], cpu_gregs[REG(B7_4)]);
        return;
     case 0x400c:               /* shad Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_shad_T0_T1();
-       gen_op_movl_T1_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[1]);
        return;
     case 0x400d:               /* shld Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_shld_T0_T1();
-       gen_op_movl_T1_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[1]);
        return;
     case 0x3008:               /* sub Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_sub_T0_rN(REG(B11_8));
+       tcg_gen_sub_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], cpu_gregs[REG(B7_4)]);
        return;
     case 0x300a:               /* subc Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_subc_T0_T1();
-       gen_op_movl_T1_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[1]);
        return;
     case 0x300b:               /* subv Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_subv_T0_T1();
-       gen_op_movl_T1_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[1]);
        return;
     case 0x2008:               /* tst Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_tst_T0_T1();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1]);
+       gen_cmp_imm(TCG_COND_EQ, cpu_T[0], 0);
        return;
     case 0x200a:               /* xor Rm,Rn */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_xor_T0_rN(REG(B11_8));
+       tcg_gen_xor_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], cpu_gregs[REG(B7_4)]);
        return;
     case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */
        if (ctx->fpscr & FPSCR_SZ) {
@@ -642,60 +822,63 @@ void _decode_opc(DisasContext * ctx)
     case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */
        if (ctx->fpscr & FPSCR_SZ) {
            gen_op_fmov_drN_DT0(XREG(B7_4));
-           gen_op_movl_rN_T1(REG(B11_8));
+           tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
            gen_op_stfq_DT0_T1(ctx);
        } else {
            gen_op_fmov_frN_FT0(FREG(B7_4));
-           gen_op_movl_rN_T1(REG(B11_8));
+           tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
            gen_op_stfl_FT0_T1(ctx);
        }
        return;
     case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
        if (ctx->fpscr & FPSCR_SZ) {
-           gen_op_movl_rN_T0(REG(B7_4));
+           tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
            gen_op_ldfq_T0_DT0(ctx);
            gen_op_fmov_DT0_drN(XREG(B11_8));
        } else {
-           gen_op_movl_rN_T0(REG(B7_4));
+           tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
            gen_op_ldfl_T0_FT0(ctx);
            gen_op_fmov_FT0_frN(FREG(B11_8));
        }
        return;
     case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
        if (ctx->fpscr & FPSCR_SZ) {
-           gen_op_movl_rN_T0(REG(B7_4));
+           tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
            gen_op_ldfq_T0_DT0(ctx);
            gen_op_fmov_DT0_drN(XREG(B11_8));
-           gen_op_inc8_rN(REG(B7_4));
+           tcg_gen_addi_i32(cpu_gregs[REG(B7_4)],
+                            cpu_gregs[REG(B7_4)], 8);
        } else {
-           gen_op_movl_rN_T0(REG(B7_4));
+           tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
            gen_op_ldfl_T0_FT0(ctx);
            gen_op_fmov_FT0_frN(FREG(B11_8));
-           gen_op_inc4_rN(REG(B7_4));
+           tcg_gen_addi_i32(cpu_gregs[REG(B7_4)],
+                            cpu_gregs[REG(B7_4)], 4);
        }
        return;
     case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */
        if (ctx->fpscr & FPSCR_SZ) {
-           gen_op_dec8_rN(REG(B11_8));
+           tcg_gen_subi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 8);
            gen_op_fmov_drN_DT0(XREG(B7_4));
-           gen_op_movl_rN_T1(REG(B11_8));
+           tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+           tcg_gen_addi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 8);
            gen_op_stfq_DT0_T1(ctx);
+           tcg_gen_subi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 8);
        } else {
-           gen_op_dec4_rN(REG(B11_8));
+           tcg_gen_subi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 4);
            gen_op_fmov_frN_FT0(FREG(B7_4));
-           gen_op_movl_rN_T1(REG(B11_8));
+           tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+           tcg_gen_addi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 4);
            gen_op_stfl_FT0_T1(ctx);
+           tcg_gen_subi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 4);
        }
        return;
     case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */
+       tcg_gen_add_i32(cpu_T[0], cpu_gregs[REG(B7_4)], cpu_gregs[REG(0)]);
        if (ctx->fpscr & FPSCR_SZ) {
-           gen_op_movl_rN_T0(REG(B7_4));
-           gen_op_add_rN_T0(REG(0));
            gen_op_ldfq_T0_DT0(ctx);
            gen_op_fmov_DT0_drN(XREG(B11_8));
        } else {
-           gen_op_movl_rN_T0(REG(B7_4));
-           gen_op_add_rN_T0(REG(0));
            gen_op_ldfl_T0_FT0(ctx);
            gen_op_fmov_FT0_frN(FREG(B11_8));
        }
@@ -703,13 +886,13 @@ void _decode_opc(DisasContext * ctx)
     case 0xf007: /* fmov {F,D,X}Rn,@(R0,Rn) - FPSCR: Nothing */
        if (ctx->fpscr & FPSCR_SZ) {
            gen_op_fmov_drN_DT0(XREG(B7_4));
-           gen_op_movl_rN_T1(REG(B11_8));
-           gen_op_add_rN_T1(REG(0));
+           tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+           tcg_gen_add_i32(cpu_T[1], cpu_T[1], cpu_gregs[REG(0)]);
            gen_op_stfq_DT0_T1(ctx);
        } else {
            gen_op_fmov_frN_FT0(FREG(B7_4));
-           gen_op_movl_rN_T1(REG(B11_8));
-           gen_op_add_rN_T1(REG(0));
+           tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+           tcg_gen_add_i32(cpu_T[1], cpu_T[1], cpu_gregs[REG(0)]);
            gen_op_stfl_FT0_T1(ctx);
        }
        return;
@@ -762,14 +945,14 @@ void _decode_opc(DisasContext * ctx)
 
     switch (ctx->opcode & 0xff00) {
     case 0xc900:               /* and #imm,R0 */
-       gen_op_and_imm_rN(B7_0, REG(0));
+       tcg_gen_andi_i32(cpu_gregs[REG(0)], cpu_gregs[REG(0)], B7_0);
        return;
     case 0xcd00:               /* and.b #imm,@(R0,GBR) */
-       gen_op_movl_rN_T0(REG(0));
-       gen_op_addl_GBR_T0();
-       gen_op_movl_T0_T1();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
+       tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_gbr);
+       tcg_gen_mov_i32(cpu_T[1], cpu_T[0]);
        gen_op_ldub_T0_T0(ctx);
-       gen_op_and_imm_T0(B7_0);
+       tcg_gen_andi_i32(cpu_T[0], cpu_T[0], B7_0);
        gen_op_stb_T0_T1(ctx);
        return;
     case 0x8b00:               /* bf label */
@@ -780,7 +963,7 @@ void _decode_opc(DisasContext * ctx)
        return;
     case 0x8f00:               /* bf/s label */
        CHECK_NOT_DELAY_SLOT
-           gen_op_bf_s(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2);
+       gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 0);
        ctx->flags |= DELAY_SLOT_CONDITIONAL;
        return;
     case 0x8900:               /* bt label */
@@ -791,197 +974,214 @@ void _decode_opc(DisasContext * ctx)
        return;
     case 0x8d00:               /* bt/s label */
        CHECK_NOT_DELAY_SLOT
-           gen_op_bt_s(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2);
+       gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 1);
        ctx->flags |= DELAY_SLOT_CONDITIONAL;
        return;
     case 0x8800:               /* cmp/eq #imm,R0 */
-       gen_op_movl_rN_T0(REG(0));
-       gen_op_cmp_eq_imm_T0(B7_0s);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
+       gen_cmp_imm(TCG_COND_EQ, cpu_T[0], B7_0s);
        return;
     case 0xc400:               /* mov.b @(disp,GBR),R0 */
        gen_op_stc_gbr_T0();
-       gen_op_addl_imm_T0(B7_0);
+       tcg_gen_addi_i32(cpu_T[0], cpu_T[0], B7_0);
        gen_op_ldb_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(0));
+       tcg_gen_mov_i32(cpu_gregs[REG(0)], cpu_T[0]);
        return;
     case 0xc500:               /* mov.w @(disp,GBR),R0 */
        gen_op_stc_gbr_T0();
-       gen_op_addl_imm_T0(B7_0 * 2);
+       tcg_gen_addi_i32(cpu_T[0], cpu_T[0], B7_0 * 2);
        gen_op_ldw_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(0));
+       tcg_gen_mov_i32(cpu_gregs[REG(0)], cpu_T[0]);
        return;
     case 0xc600:               /* mov.l @(disp,GBR),R0 */
        gen_op_stc_gbr_T0();
-       gen_op_addl_imm_T0(B7_0 * 4);
+       tcg_gen_addi_i32(cpu_T[0], cpu_T[0], B7_0 * 4);
        gen_op_ldl_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(0));
+       tcg_gen_mov_i32(cpu_gregs[REG(0)], cpu_T[0]);
        return;
     case 0xc000:               /* mov.b R0,@(disp,GBR) */
        gen_op_stc_gbr_T0();
-       gen_op_addl_imm_T0(B7_0);
-       gen_op_movl_T0_T1();
-       gen_op_movl_rN_T0(REG(0));
+       tcg_gen_addi_i32(cpu_T[0], cpu_T[0], B7_0);
+       tcg_gen_mov_i32(cpu_T[1], cpu_T[0]);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
        gen_op_stb_T0_T1(ctx);
        return;
     case 0xc100:               /* mov.w R0,@(disp,GBR) */
        gen_op_stc_gbr_T0();
-       gen_op_addl_imm_T0(B7_0 * 2);
-       gen_op_movl_T0_T1();
-       gen_op_movl_rN_T0(REG(0));
+       tcg_gen_addi_i32(cpu_T[0], cpu_T[0], B7_0 * 2);
+       tcg_gen_mov_i32(cpu_T[1], cpu_T[0]);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
        gen_op_stw_T0_T1(ctx);
        return;
     case 0xc200:               /* mov.l R0,@(disp,GBR) */
        gen_op_stc_gbr_T0();
-       gen_op_addl_imm_T0(B7_0 * 4);
-       gen_op_movl_T0_T1();
-       gen_op_movl_rN_T0(REG(0));
+       tcg_gen_addi_i32(cpu_T[0], cpu_T[0], B7_0 * 4);
+       tcg_gen_mov_i32(cpu_T[1], cpu_T[0]);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
        gen_op_stl_T0_T1(ctx);
        return;
     case 0x8000:               /* mov.b R0,@(disp,Rn) */
-       gen_op_movl_rN_T0(REG(0));
-       gen_op_movl_rN_T1(REG(B7_4));
-       gen_op_addl_imm_T1(B3_0);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B7_4)]);
+       tcg_gen_addi_i32(cpu_T[1], cpu_T[1], B3_0);
        gen_op_stb_T0_T1(ctx);
        return;
     case 0x8100:               /* mov.w R0,@(disp,Rn) */
-       gen_op_movl_rN_T0(REG(0));
-       gen_op_movl_rN_T1(REG(B7_4));
-       gen_op_addl_imm_T1(B3_0 * 2);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B7_4)]);
+       tcg_gen_addi_i32(cpu_T[1], cpu_T[1], B3_0 * 2);
        gen_op_stw_T0_T1(ctx);
        return;
     case 0x8400:               /* mov.b @(disp,Rn),R0 */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_addl_imm_T0(B3_0);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_addi_i32(cpu_T[0], cpu_T[0], B3_0);
        gen_op_ldb_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(0));
+       tcg_gen_mov_i32(cpu_gregs[REG(0)], cpu_T[0]);
        return;
     case 0x8500:               /* mov.w @(disp,Rn),R0 */
-       gen_op_movl_rN_T0(REG(B7_4));
-       gen_op_addl_imm_T0(B3_0 * 2);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B7_4)]);
+       tcg_gen_addi_i32(cpu_T[0], cpu_T[0], B3_0 * 2);
        gen_op_ldw_T0_T0(ctx);
-       gen_op_movl_T0_rN(REG(0));
+       tcg_gen_mov_i32(cpu_gregs[REG(0)], cpu_T[0]);
        return;
     case 0xc700:               /* mova @(disp,PC),R0 */
-       gen_op_movl_imm_rN(((ctx->pc & 0xfffffffc) + 4 + B7_0 * 4) & ~3,
-                          REG(0));
+       tcg_gen_movi_i32(cpu_gregs[REG(0)],
+                        ((ctx->pc & 0xfffffffc) + 4 + B7_0 * 4) & ~3);
        return;
     case 0xcb00:               /* or #imm,R0 */
-       gen_op_or_imm_rN(B7_0, REG(0));
+       tcg_gen_ori_i32(cpu_gregs[REG(0)], cpu_gregs[REG(0)], B7_0);
        return;
     case 0xcf00:               /* or.b #imm,@(R0,GBR) */
-       gen_op_movl_rN_T0(REG(0));
-       gen_op_addl_GBR_T0();
-       gen_op_movl_T0_T1();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
+       tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_gbr);
+       tcg_gen_mov_i32(cpu_T[0], cpu_T[1]);
        gen_op_ldub_T0_T0(ctx);
-       gen_op_or_imm_T0(B7_0);
+       tcg_gen_ori_i32(cpu_T[0], cpu_T[0], B7_0);
        gen_op_stb_T0_T1(ctx);
        return;
     case 0xc300:               /* trapa #imm */
-       CHECK_NOT_DELAY_SLOT gen_op_movl_imm_PC(ctx->pc);
-       gen_op_trapa(B7_0);
+       CHECK_NOT_DELAY_SLOT
+       tcg_gen_movi_i32(cpu_pc, ctx->pc);
+       tcg_gen_movi_i32(cpu_T[0], B7_0);
+       tcg_gen_helper_0_1(helper_trapa, cpu_T[0]);
        ctx->bstate = BS_BRANCH;
        return;
     case 0xc800:               /* tst #imm,R0 */
-       gen_op_tst_imm_rN(B7_0, REG(0));
+       tcg_gen_andi_i32(cpu_T[0], cpu_gregs[REG(0)], B7_0);
+       gen_cmp_imm(TCG_COND_EQ, cpu_T[0], 0);
        return;
     case 0xcc00:               /* tst.b #imm,@(R0,GBR) */
-       gen_op_movl_rN_T0(REG(0));
-       gen_op_addl_GBR_T0();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
+       tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_gbr);
        gen_op_ldub_T0_T0(ctx);
-       gen_op_tst_imm_T0(B7_0);
+       tcg_gen_andi_i32(cpu_T[0], cpu_T[0], B7_0);
+       gen_cmp_imm(TCG_COND_EQ, cpu_T[0], 0);
        return;
     case 0xca00:               /* xor #imm,R0 */
-       gen_op_xor_imm_rN(B7_0, REG(0));
+       tcg_gen_xori_i32(cpu_gregs[REG(0)], cpu_gregs[REG(0)], B7_0);
        return;
     case 0xce00:               /* xor.b #imm,@(R0,GBR) */
-       gen_op_movl_rN_T0(REG(0));
-       gen_op_addl_GBR_T0();
-       gen_op_movl_T0_T1();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
+       tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_gbr);
+       tcg_gen_mov_i32(cpu_T[1], cpu_T[0]);
        gen_op_ldub_T0_T0(ctx);
-       gen_op_xor_imm_T0(B7_0);
+       tcg_gen_xori_i32(cpu_T[0], cpu_T[0], B7_0);
        gen_op_stb_T0_T1(ctx);
        return;
     }
 
     switch (ctx->opcode & 0xf08f) {
     case 0x408e:               /* ldc Rm,Rn_BANK */
-       gen_op_movl_rN_rN(REG(B11_8), ALTREG(B6_4));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
+       tcg_gen_mov_i32(cpu_gregs[ALTREG(B6_4)], cpu_T[0]);
        return;
     case 0x4087:               /* ldc.l @Rm+,Rn_BANK */
-       gen_op_movl_rN_T0(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
        gen_op_ldl_T0_T0(ctx);
-       gen_op_movl_T0_rN(ALTREG(B6_4));
-       gen_op_inc4_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_gregs[ALTREG(B6_4)], cpu_T[0]);
+       tcg_gen_addi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 4);
        return;
     case 0x0082:               /* stc Rm_BANK,Rn */
-       gen_op_movl_rN_rN(ALTREG(B6_4), REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[ALTREG(B6_4)]);
+       tcg_gen_mov_i32(cpu_gregs[REG(B11_8)], cpu_T[0]);
        return;
     case 0x4083:               /* stc.l Rm_BANK,@-Rn */
-       gen_op_dec4_rN(REG(B11_8));
-       gen_op_movl_rN_T1(REG(B11_8));
-       gen_op_movl_rN_T0(ALTREG(B6_4));
+       tcg_gen_subi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 4);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[ALTREG(B6_4)]);
+       tcg_gen_addi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 4);
        gen_op_stl_T0_T1(ctx);
+       tcg_gen_subi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 4);
        return;
     }
 
     switch (ctx->opcode & 0xf0ff) {
     case 0x0023:               /* braf Rn */
-       CHECK_NOT_DELAY_SLOT gen_op_movl_rN_T0(REG(B11_8));
-       gen_op_braf_T0(ctx->pc + 4);
+       CHECK_NOT_DELAY_SLOT tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
+       tcg_gen_addi_i32(cpu_delayed_pc, cpu_T[0], ctx->pc + 4);
        ctx->flags |= DELAY_SLOT;
        ctx->delayed_pc = (uint32_t) - 1;
        return;
     case 0x0003:               /* bsrf Rn */
-       CHECK_NOT_DELAY_SLOT gen_op_movl_rN_T0(REG(B11_8));
-       gen_op_bsrf_T0(ctx->pc + 4);
+       CHECK_NOT_DELAY_SLOT tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
+       tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
+       tcg_gen_add_i32(cpu_delayed_pc, cpu_T[0], cpu_pr);
        ctx->flags |= DELAY_SLOT;
        ctx->delayed_pc = (uint32_t) - 1;
        return;
     case 0x4015:               /* cmp/pl Rn */
-       gen_op_movl_rN_T0(REG(B11_8));
-       gen_op_cmp_pl_T0();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
+       gen_cmp_imm(TCG_COND_GT, cpu_T[0], 0);
        return;
     case 0x4011:               /* cmp/pz Rn */
-       gen_op_movl_rN_T0(REG(B11_8));
-       gen_op_cmp_pz_T0();
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
+       gen_cmp_imm(TCG_COND_GE, cpu_T[0], 0);
        return;
     case 0x4010:               /* dt Rn */
-       gen_op_dt_rN(REG(B11_8));
+       tcg_gen_subi_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 1);
+       gen_cmp_imm(TCG_COND_EQ, cpu_gregs[REG(B11_8)], 0);
        return;
     case 0x402b:               /* jmp @Rn */
-       CHECK_NOT_DELAY_SLOT gen_op_movl_rN_T0(REG(B11_8));
-       gen_op_jmp_T0();
+       CHECK_NOT_DELAY_SLOT tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
+       tcg_gen_mov_i32(cpu_delayed_pc, cpu_T[0]);
        ctx->flags |= DELAY_SLOT;
        ctx->delayed_pc = (uint32_t) - 1;
        return;
     case 0x400b:               /* jsr @Rn */
-       CHECK_NOT_DELAY_SLOT gen_op_movl_rN_T0(REG(B11_8));
-       gen_op_jsr_T0(ctx->pc + 4);
+       CHECK_NOT_DELAY_SLOT tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
+       tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
+       tcg_gen_mov_i32(cpu_delayed_pc, cpu_T[0]);
        ctx->flags |= DELAY_SLOT;
        ctx->delayed_pc = (uint32_t) - 1;
        return;
 #define LDST(reg,ldnum,ldpnum,ldop,stnum,stpnum,stop,extrald)  \
   case ldnum:                                                  \
-    gen_op_movl_rN_T0 (REG(B11_8));                            \
+    tcg_gen_mov_i32 (cpu_T[0], cpu_gregs[REG(B11_8)]);         \
     gen_op_##ldop##_T0_##reg ();                               \
     extrald                                                    \
     return;                                                    \
   case ldpnum:                                                 \
-    gen_op_movl_rN_T0 (REG(B11_8));                            \
+    tcg_gen_mov_i32 (cpu_T[0], cpu_gregs[REG(B11_8)]);         \
     gen_op_ldl_T0_T0 (ctx);                                    \
-    gen_op_inc4_rN (REG(B11_8));                               \
+    tcg_gen_addi_i32(cpu_gregs[REG(B11_8)],                    \
+                     cpu_gregs[REG(B11_8)], 4);                        \
     gen_op_##ldop##_T0_##reg ();                               \
     extrald                                                    \
     return;                                                    \
   case stnum:                                                  \
-    gen_op_##stop##_##reg##_T0 ();                                     \
-    gen_op_movl_T0_rN (REG(B11_8));                            \
+    gen_op_##stop##_##reg##_T0 ();                             \
+    tcg_gen_mov_i32 (cpu_gregs[REG(B11_8)], cpu_T[0]);         \
     return;                                                    \
   case stpnum:                                                 \
     gen_op_##stop##_##reg##_T0 ();                             \
-    gen_op_dec4_rN (REG(B11_8));                               \
-    gen_op_movl_rN_T1 (REG(B11_8));                            \
+    tcg_gen_subi_i32(cpu_gregs[REG(B11_8)],                    \
+                     cpu_gregs[REG(B11_8)], 4);                        \
+    tcg_gen_mov_i32 (cpu_T[1], cpu_gregs[REG(B11_8)]);         \
+    tcg_gen_addi_i32(cpu_gregs[REG(B11_8)],                    \
+                     cpu_gregs[REG(B11_8)], 4);                        \
     gen_op_stl_T0_T1 (ctx);                                    \
+    tcg_gen_subi_i32(cpu_gregs[REG(B11_8)],                    \
+                     cpu_gregs[REG(B11_8)], 4);                        \
     return;
        LDST(sr, 0x400e, 0x4007, ldc, 0x0002, 0x4003, stc, ctx->bstate =
             BS_STOP;)
@@ -997,23 +1197,23 @@ void _decode_opc(DisasContext * ctx)
        LDST(fpscr, 0x406a, 0x4066, lds, 0x006a, 0x4062, sts, ctx->bstate =
             BS_STOP;)
     case 0x00c3:               /* movca.l R0,@Rm */
-       gen_op_movl_rN_T0(REG(0));
-       gen_op_movl_rN_T1(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(0)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_gregs[REG(B11_8)]);
        gen_op_stl_T0_T1(ctx);
        return;
     case 0x0029:               /* movt Rn */
-       gen_op_movt_rN(REG(B11_8));
+       tcg_gen_andi_i32(cpu_gregs[REG(B11_8)], cpu_sr, SR_T);
        return;
     case 0x0093:               /* ocbi @Rn */
-       gen_op_movl_rN_T0(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
        gen_op_ldl_T0_T0(ctx);
        return;
     case 0x00a3:               /* ocbp @Rn */
-       gen_op_movl_rN_T0(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
        gen_op_ldl_T0_T0(ctx);
        return;
     case 0x00b3:               /* ocbwb @Rn */
-       gen_op_movl_rN_T0(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
        gen_op_ldl_T0_T0(ctx);
        return;
     case 0x0083:               /* pref @Rn */
@@ -1041,25 +1241,30 @@ void _decode_opc(DisasContext * ctx)
        gen_op_shlr_Rn(REG(B11_8));
        return;
     case 0x4008:               /* shll2 Rn */
-       gen_op_shll2_Rn(REG(B11_8));
+       tcg_gen_shli_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 2);
        return;
     case 0x4018:               /* shll8 Rn */
-       gen_op_shll8_Rn(REG(B11_8));
+       tcg_gen_shli_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 8);
        return;
     case 0x4028:               /* shll16 Rn */
-       gen_op_shll16_Rn(REG(B11_8));
+       tcg_gen_shli_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 16);
        return;
     case 0x4009:               /* shlr2 Rn */
-       gen_op_shlr2_Rn(REG(B11_8));
+       tcg_gen_shri_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 2);
        return;
     case 0x4019:               /* shlr8 Rn */
-       gen_op_shlr8_Rn(REG(B11_8));
+       tcg_gen_shri_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 8);
        return;
     case 0x4029:               /* shlr16 Rn */
-       gen_op_shlr16_Rn(REG(B11_8));
+       tcg_gen_shri_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], 16);
        return;
     case 0x401b:               /* tas.b @Rn */
-       gen_op_tasb_rN(REG(B11_8));
+       tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
+       tcg_gen_mov_i32(cpu_T[1], cpu_T[0]);
+       gen_op_ldub_T0_T0(ctx);
+       gen_cmp_imm(TCG_COND_EQ, cpu_T[0], 0);
+       tcg_gen_ori_i32(cpu_T[0], cpu_T[0], 0x80);
+       gen_op_stb_T0_T1(ctx);
        return;
     case 0xf00d: /* fsts FPUL,FRn - FPSCR: Nothing */
        gen_op_movl_fpul_FT0();
@@ -1126,14 +1331,14 @@ void _decode_opc(DisasContext * ctx)
        break;
     case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
        if (!(ctx->fpscr & FPSCR_PR)) {
-           gen_op_movl_imm_T0(0);
+           tcg_gen_movi_i32(cpu_T[0], 0);
            gen_op_fmov_T0_frN(FREG(B11_8));
            return;
        }
        break;
     case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
        if (!(ctx->fpscr & FPSCR_PR)) {
-           gen_op_movl_imm_T0(0x3f800000);
+           tcg_gen_movi_i32(cpu_T[0], 0x3f800000);
            gen_op_fmov_T0_frN(FREG(B11_8));
            return;
        }
@@ -1152,7 +1357,7 @@ void _decode_opc(DisasContext * ctx)
 
     fprintf(stderr, "unknown instruction 0x%04x at pc 0x%08x\n",
            ctx->opcode, ctx->pc);
-    gen_op_raise_illegal_instruction();
+    tcg_gen_helper_0_0(helper_raise_illegal_instruction);
     ctx->bstate = BS_EXCP;
 }
 
@@ -1164,7 +1369,12 @@ void decode_opc(DisasContext * ctx)
 
     if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
         if (ctx->flags & DELAY_SLOT_CLEARME) {
-            gen_op_store_flags(0);
+            gen_store_flags(0);
+        } else {
+           /* go out of the delay slot */
+           uint32_t new_flags = ctx->flags;
+           new_flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+           gen_store_flags(new_flags);
         }
         ctx->flags = 0;
         ctx->bstate = BS_BRANCH;
@@ -1175,9 +1385,13 @@ void decode_opc(DisasContext * ctx)
        }
 
     }
+
+    /* go into a delay slot */
+    if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))
+        gen_store_flags(ctx->flags);
 }
 
-static inline int
+static inline void
 gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
                                int search_pc)
 {
@@ -1185,6 +1399,8 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
     target_ulong pc_start;
     static uint16_t *gen_opc_end;
     int i, ii;
+    int num_insns;
+    int max_insns;
 
     pc_start = tb->pc;
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
@@ -1209,13 +1425,18 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
 #endif
 
     ii = -1;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+    gen_icount_start();
     while (ctx.bstate == BS_NONE && gen_opc_ptr < gen_opc_end) {
        if (env->nb_breakpoints > 0) {
            for (i = 0; i < env->nb_breakpoints; i++) {
                if (ctx.pc == env->breakpoints[i]) {
                    /* We have hit a breakpoint - make sure PC is up-to-date */
-                   gen_op_movl_imm_PC(ctx.pc);
-                   gen_op_debug();
+                   tcg_gen_movi_i32(cpu_pc, ctx.pc);
+                   tcg_gen_helper_0_0(helper_debug);
                    ctx.bstate = BS_EXCP;
                    break;
                }
@@ -1231,24 +1452,32 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
             gen_opc_pc[ii] = ctx.pc;
             gen_opc_hflags[ii] = ctx.flags;
             gen_opc_instr_start[ii] = 1;
+            gen_opc_icount[ii] = num_insns;
         }
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
 #if 0
        fprintf(stderr, "Loading opcode at address 0x%08x\n", ctx.pc);
        fflush(stderr);
 #endif
        ctx.opcode = lduw_code(ctx.pc);
        decode_opc(&ctx);
+        num_insns++;
        ctx.pc += 2;
        if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
            break;
        if (env->singlestep_enabled)
            break;
+        if (num_insns >= max_insns)
+            break;
 #ifdef SH4_SINGLE_STEP
        break;
 #endif
     }
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
     if (env->singlestep_enabled) {
-        gen_op_debug();
+        tcg_gen_helper_0_0(helper_debug);
     } else {
        switch (ctx.bstate) {
         case BS_STOP:
@@ -1256,7 +1485,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
             /* fall through */
         case BS_NONE:
             if (ctx.flags) {
-                gen_op_store_flags(ctx.flags | DELAY_SLOT_CLEARME);
+                gen_store_flags(ctx.flags | DELAY_SLOT_CLEARME);
            }
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;
@@ -1270,6 +1499,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
        }
     }
 
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         i = gen_opc_ptr - gen_opc_buf;
@@ -1278,6 +1508,7 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
             gen_opc_instr_start[ii++] = 0;
     } else {
         tb->size = ctx.pc - pc_start;
+        tb->icount = num_insns;
     }
 
 #ifdef DEBUG_DISAS
@@ -1291,17 +1522,16 @@ gen_intermediate_code_internal(CPUState * env, TranslationBlock * tb,
        fprintf(logfile, "\n");
     }
 #endif
-    return 0;
 }
 
-int gen_intermediate_code(CPUState * env, struct TranslationBlock *tb)
+void gen_intermediate_code(CPUState * env, struct TranslationBlock *tb)
 {
-    return gen_intermediate_code_internal(env, tb, 0);
+    gen_intermediate_code_internal(env, tb, 0);
 }
 
-int gen_intermediate_code_pc(CPUState * env, struct TranslationBlock *tb)
+void gen_intermediate_code_pc(CPUState * env, struct TranslationBlock *tb)
 {
-    return gen_intermediate_code_internal(env, tb, 1);
+    gen_intermediate_code_internal(env, tb, 1);
 }
 
 void gen_pc_load(CPUState *env, TranslationBlock *tb,