]> git.proxmox.com Git - qemu.git/blobdiff - target-i386/translate.c
Add instruction counter.
[qemu.git] / target-i386 / translate.c
index 844cdb2c19b01b2606fee439970921cca8ccd566..ea0933ef5ce8806d714be4c19bbd62eb3c8c6409 100644 (file)
 //#define MACRO_TEST   1
 
 /* global register indexes */
-static TCGv cpu_env, cpu_T[2], cpu_A0, cpu_cc_op, cpu_cc_src, cpu_cc_dst;
-static TCGv cpu_T3;
+static TCGv cpu_env, cpu_A0, cpu_cc_op, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
+/* local temps */
+static TCGv cpu_T[2], cpu_T3;
 /* local register indexes (only used inside old micro ops) */
 static TCGv cpu_tmp0, cpu_tmp1_i64, cpu_tmp2_i32, cpu_tmp3_i32, cpu_tmp4, cpu_ptr0, cpu_ptr1;
 static TCGv cpu_tmp5, cpu_tmp6;
 
+#include "gen-icount.h"
+
 #ifdef TARGET_X86_64
 static int x86_64_hregs;
 #endif
@@ -260,34 +263,34 @@ static inline void gen_op_andl_A0_ffff(void)
 #define REG_LH_OFFSET 4
 #endif
 
-static inline void gen_op_mov_reg_TN(int ot, int t_index, int reg)
+static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
 {
     switch(ot) {
     case OT_BYTE:
         if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
-            tcg_gen_st8_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
+            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_B_OFFSET);
         } else {
-            tcg_gen_st8_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
         }
         break;
     case OT_WORD:
-        tcg_gen_st16_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
+        tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_W_OFFSET);
         break;
 #ifdef TARGET_X86_64
     case OT_LONG:
-        tcg_gen_st32_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
         /* high part of register set to zero */
         tcg_gen_movi_tl(cpu_tmp0, 0);
         tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
         break;
     default:
     case OT_QUAD:
-        tcg_gen_st_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
         break;
 #else
     default:
     case OT_LONG:
-        tcg_gen_st32_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
         break;
 #endif
     }
@@ -295,12 +298,12 @@ static inline void gen_op_mov_reg_TN(int ot, int t_index, int reg)
 
 static inline void gen_op_mov_reg_T0(int ot, int reg)
 {
-    gen_op_mov_reg_TN(ot, 0, reg);
+    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
 }
 
 static inline void gen_op_mov_reg_T1(int ot, int reg)
 {
-    gen_op_mov_reg_TN(ot, 1, reg);
+    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
 }
 
 static inline void gen_op_mov_reg_A0(int size, int reg)
@@ -329,23 +332,28 @@ static inline void gen_op_mov_reg_A0(int size, int reg)
     }
 }
 
-static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
+static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
 {
     switch(ot) {
     case OT_BYTE:
         if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
             goto std_case;
         } else {
-            tcg_gen_ld8u_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
+            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + REG_H_OFFSET);
         }
         break;
     default:
     std_case:
-        tcg_gen_ld_tl(cpu_T[t_index], cpu_env, offsetof(CPUState, regs[reg]));
+        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
         break;
     }
 }
 
+static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
+{
+    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
+}
+
 static inline void gen_op_movl_A0_reg(int reg)
 {
     tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
@@ -511,90 +519,70 @@ static inline void gen_op_lds_T0_A0(int idx)
     }
 }
 
-/* sign does not matter, except for lidt/lgdt call (TODO: fix it) */
-static inline void gen_op_ld_T0_A0(int idx)
+static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
 {
     int mem_index = (idx >> 2) - 1;
     switch(idx & 3) {
     case 0:
-        tcg_gen_qemu_ld8u(cpu_T[0], cpu_A0, mem_index);
+        tcg_gen_qemu_ld8u(t0, a0, mem_index);
         break;
     case 1:
-        tcg_gen_qemu_ld16u(cpu_T[0], cpu_A0, mem_index);
+        tcg_gen_qemu_ld16u(t0, a0, mem_index);
         break;
     case 2:
-        tcg_gen_qemu_ld32u(cpu_T[0], cpu_A0, mem_index);
+        tcg_gen_qemu_ld32u(t0, a0, mem_index);
         break;
     default:
     case 3:
-        tcg_gen_qemu_ld64(cpu_T[0], cpu_A0, mem_index);
+        tcg_gen_qemu_ld64(t0, a0, mem_index);
         break;
     }
 }
 
+/* XXX: always use ldu or lds */
+static inline void gen_op_ld_T0_A0(int idx)
+{
+    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
+}
+
 static inline void gen_op_ldu_T0_A0(int idx)
 {
-    gen_op_ld_T0_A0(idx);
+    gen_op_ld_v(idx, cpu_T[0], cpu_A0);
 }
 
 static inline void gen_op_ld_T1_A0(int idx)
+{
+    gen_op_ld_v(idx, cpu_T[1], cpu_A0);
+}
+
+static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
 {
     int mem_index = (idx >> 2) - 1;
     switch(idx & 3) {
     case 0:
-        tcg_gen_qemu_ld8u(cpu_T[1], cpu_A0, mem_index);
+        tcg_gen_qemu_st8(t0, a0, mem_index);
         break;
     case 1:
-        tcg_gen_qemu_ld16u(cpu_T[1], cpu_A0, mem_index);
+        tcg_gen_qemu_st16(t0, a0, mem_index);
         break;
     case 2:
-        tcg_gen_qemu_ld32u(cpu_T[1], cpu_A0, mem_index);
+        tcg_gen_qemu_st32(t0, a0, mem_index);
         break;
     default:
     case 3:
-        tcg_gen_qemu_ld64(cpu_T[1], cpu_A0, mem_index);
+        tcg_gen_qemu_st64(t0, a0, mem_index);
         break;
     }
 }
 
 static inline void gen_op_st_T0_A0(int idx)
 {
-    int mem_index = (idx >> 2) - 1;
-    switch(idx & 3) {
-    case 0:
-        tcg_gen_qemu_st8(cpu_T[0], cpu_A0, mem_index);
-        break;
-    case 1:
-        tcg_gen_qemu_st16(cpu_T[0], cpu_A0, mem_index);
-        break;
-    case 2:
-        tcg_gen_qemu_st32(cpu_T[0], cpu_A0, mem_index);
-        break;
-    default:
-    case 3:
-        tcg_gen_qemu_st64(cpu_T[0], cpu_A0, mem_index);
-        break;
-    }
+    gen_op_st_v(idx, cpu_T[0], cpu_A0);
 }
 
 static inline void gen_op_st_T1_A0(int idx)
 {
-    int mem_index = (idx >> 2) - 1;
-    switch(idx & 3) {
-    case 0:
-        tcg_gen_qemu_st8(cpu_T[1], cpu_A0, mem_index);
-        break;
-    case 1:
-        tcg_gen_qemu_st16(cpu_T[1], cpu_A0, mem_index);
-        break;
-    case 2:
-        tcg_gen_qemu_st32(cpu_T[1], cpu_A0, mem_index);
-        break;
-    default:
-    case 3:
-        tcg_gen_qemu_st64(cpu_T[1], cpu_A0, mem_index);
-        break;
-    }
+    gen_op_st_v(idx, cpu_T[1], cpu_A0);
 }
 
 static inline void gen_jmp_im(target_ulong pc)
@@ -703,14 +691,14 @@ static inline void gen_op_jnz_ecx(int size, int label1)
 {
     tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
     gen_extu(size + 1, cpu_tmp0);
-    tcg_gen_brcond_tl(TCG_COND_NE, cpu_tmp0, tcg_const_tl(0), label1);
+    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 }
 
 static inline void gen_op_jz_ecx(int size, int label1)
 {
     tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
     gen_extu(size + 1, cpu_tmp0);
-    tcg_gen_brcond_tl(TCG_COND_EQ, cpu_tmp0, tcg_const_tl(0), label1);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 }
 
 static void *helper_in_func[3] = {
@@ -747,7 +735,7 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
         tcg_gen_helper_0_1(gen_check_io_func[ot],
                            cpu_tmp2_i32);
     }
-    if(s->flags & (1ULL << INTERCEPT_IOIO_PROT)) {
+    if(s->flags & HF_SVMI_MASK) {
         if (!state_saved) {
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
@@ -857,9 +845,11 @@ static void gen_compute_eflags(TCGv reg)
     tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
 }
 
-static inline void gen_setcc_slow_T0(int op)
+static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
 {
-    switch(op) {
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    switch(jcc_op) {
     case JCC_O:
         gen_compute_eflags(cpu_T[0]);
         tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
@@ -1000,32 +990,31 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
                 t0 = cpu_cc_dst;
                 break;
             }
-            tcg_gen_brcond_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 
-                              tcg_const_tl(0), l1);
+            tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
             break;
         case JCC_S:
         fast_jcc_s:
             switch(size) {
             case 0:
                 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
-                tcg_gen_brcond_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
-                                  tcg_const_tl(0), l1);
+                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
+                                   0, l1);
                 break;
             case 1:
                 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
-                tcg_gen_brcond_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
-                                  tcg_const_tl(0), l1);
+                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
+                                   0, l1);
                 break;
 #ifdef TARGET_X86_64
             case 2:
                 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
-                tcg_gen_brcond_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
-                                  tcg_const_tl(0), l1);
+                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
+                                   0, l1);
                 break;
 #endif
             default:
-                tcg_gen_brcond_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, 
-                                  tcg_const_tl(0), l1);
+                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, 
+                                   0, l1);
                 break;
             }
             break;
@@ -1152,9 +1141,9 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
         break;
     default:
     slow_jcc:
-        gen_setcc_slow_T0(jcc_op);
-        tcg_gen_brcond_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
-                          cpu_T[0], tcg_const_tl(0), l1);
+        gen_setcc_slow_T0(s, jcc_op);
+        tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
+                           cpu_T[0], 0, l1);
         break;
     }
 }
@@ -1216,6 +1205,8 @@ static inline void gen_cmps(DisasContext *s, int ot)
 
 static inline void gen_ins(DisasContext *s, int ot)
 {
+    if (use_icount)
+        gen_io_start();
     gen_string_movl_A0_EDI(s);
     /* Note: we must do this dummy write first to be restartable in
        case of page fault. */
@@ -1228,10 +1219,14 @@ static inline void gen_ins(DisasContext *s, int ot)
     gen_op_st_T0_A0(ot + s->mem_index);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
+    if (use_icount)
+        gen_io_end();
 }
 
 static inline void gen_outs(DisasContext *s, int ot)
 {
+    if (use_icount)
+        gen_io_start();
     gen_string_movl_A0_ESI(s);
     gen_op_ld_T0_A0(ot + s->mem_index);
 
@@ -1243,6 +1238,8 @@ static inline void gen_outs(DisasContext *s, int ot)
 
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
+    if (use_icount)
+        gen_io_end();
 }
 
 /* same method as Valgrind : we generate jumps to current or next
@@ -1437,7 +1434,8 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
 {
     target_ulong mask;
     int shift_label;
-    
+    TCGv t0, t1;
+
     if (ot == OT_QUAD)
         mask = 0x3f;
     else
@@ -1478,11 +1476,18 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
 
+    /* XXX: inefficient */
+    t0 = tcg_temp_local_new(TCG_TYPE_TL);
+    t1 = tcg_temp_local_new(TCG_TYPE_TL);
+
+    tcg_gen_mov_tl(t0, cpu_T[0]);
+    tcg_gen_mov_tl(t1, cpu_T3);
+
     shift_label = gen_new_label();
-    tcg_gen_brcond_tl(TCG_COND_EQ, cpu_T[1], tcg_const_tl(0), shift_label);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label);
 
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T3);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+    tcg_gen_mov_tl(cpu_cc_src, t1);
+    tcg_gen_mov_tl(cpu_cc_dst, t0);
     if (is_right)
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
     else
@@ -1490,6 +1495,9 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
         
     gen_set_label(shift_label);
     s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
 }
 
 static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
@@ -1513,15 +1521,15 @@ static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
         if (is_right) {
             if (is_arith) {
                 gen_exts(ot, cpu_T[0]);
-                tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], op2 - 1);
+                tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
                 tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
             } else {
                 gen_extu(ot, cpu_T[0]);
-                tcg_gen_shri_tl(cpu_tmp0, cpu_T[0], op2 - 1);
+                tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
                 tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
             }
         } else {
-            tcg_gen_shli_tl(cpu_tmp0, cpu_T[0], op2 - 1);
+            tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
             tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
         }
     }
@@ -1534,7 +1542,7 @@ static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
         
     /* update eflags if non zero shift */
     if (op2 != 0) {
-        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp0);
+        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
         tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
         if (is_right)
             s->cc_op = CC_OP_SARB + ot;
@@ -1557,78 +1565,95 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
 {
     target_ulong mask;
     int label1, label2, data_bits;
-    
+    TCGv t0, t1, t2, a0;
+
+    /* XXX: inefficient, but we must use local temps */
+    t0 = tcg_temp_local_new(TCG_TYPE_TL);
+    t1 = tcg_temp_local_new(TCG_TYPE_TL);
+    t2 = tcg_temp_local_new(TCG_TYPE_TL);
+    a0 = tcg_temp_local_new(TCG_TYPE_TL);
+
     if (ot == OT_QUAD)
         mask = 0x3f;
     else
         mask = 0x1f;
 
     /* load */
-    if (op1 == OR_TMP0)
-        gen_op_ld_T0_A0(ot + s->mem_index);
-    else
-        gen_op_mov_TN_reg(ot, 0, op1);
+    if (op1 == OR_TMP0) {
+        tcg_gen_mov_tl(a0, cpu_A0);
+        gen_op_ld_v(ot + s->mem_index, t0, a0);
+    } else {
+        gen_op_mov_v_reg(ot, t0, op1);
+    }
 
-    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
+    tcg_gen_mov_tl(t1, cpu_T[1]);
+
+    tcg_gen_andi_tl(t1, t1, mask);
 
     /* Must test zero case to avoid using undefined behaviour in TCG
        shifts. */
     label1 = gen_new_label();
-    tcg_gen_brcond_tl(TCG_COND_EQ, cpu_T[1], tcg_const_tl(0), label1);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
     
     if (ot <= OT_WORD)
-        tcg_gen_andi_tl(cpu_tmp0, cpu_T[1], (1 << (3 + ot)) - 1);
+        tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
     else
-        tcg_gen_mov_tl(cpu_tmp0, cpu_T[1]);
+        tcg_gen_mov_tl(cpu_tmp0, t1);
     
-    gen_extu(ot, cpu_T[0]);
-    tcg_gen_mov_tl(cpu_T3, cpu_T[0]);
+    gen_extu(ot, t0);
+    tcg_gen_mov_tl(t2, t0);
 
     data_bits = 8 << ot;
     /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
        fix TCG definition) */
     if (is_right) {
-        tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_tmp0);
+        tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
         tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
-        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+        tcg_gen_shl_tl(t0, t0, cpu_tmp0);
     } else {
-        tcg_gen_shl_tl(cpu_tmp4, cpu_T[0], cpu_tmp0);
+        tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
         tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(data_bits), cpu_tmp0);
-        tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+        tcg_gen_shr_tl(t0, t0, cpu_tmp0);
     }
-    tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+    tcg_gen_or_tl(t0, t0, cpu_tmp4);
 
     gen_set_label(label1);
     /* store */
-    if (op1 == OR_TMP0)
-        gen_op_st_T0_A0(ot + s->mem_index);
-    else
-        gen_op_mov_reg_T0(ot, op1);
+    if (op1 == OR_TMP0) {
+        gen_op_st_v(ot + s->mem_index, t0, a0);
+    } else {
+        gen_op_mov_reg_v(ot, op1, t0);
+    }
     
     /* update eflags */
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
 
     label2 = gen_new_label();
-    tcg_gen_brcond_tl(TCG_COND_EQ, cpu_T[1], tcg_const_tl(0), label2);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
 
     gen_compute_eflags(cpu_cc_src);
     tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
-    tcg_gen_xor_tl(cpu_tmp0, cpu_T3, cpu_T[0]);
+    tcg_gen_xor_tl(cpu_tmp0, t2, t0);
     tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
     tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
     tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
     if (is_right) {
-        tcg_gen_shri_tl(cpu_T[0], cpu_T[0], data_bits - 1);
+        tcg_gen_shri_tl(t0, t0, data_bits - 1);
     }
-    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_C);
-    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
+    tcg_gen_andi_tl(t0, t0, CC_C);
+    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
     
     tcg_gen_discard_tl(cpu_cc_dst);
     tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
         
     gen_set_label(label2);
     s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(a0);
 }
 
 static void *helper_rotc[8] = {
@@ -1667,9 +1692,9 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
 
     /* update eflags */
     label1 = gen_new_label();
-    tcg_gen_brcond_tl(TCG_COND_EQ, cpu_T3, tcg_const_tl(-1), label1);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
 
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T3);
+    tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
     tcg_gen_discard_tl(cpu_cc_dst);
     tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
         
@@ -1683,6 +1708,12 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
 {
     int label1, label2, data_bits;
     target_ulong mask;
+    TCGv t0, t1, t2, a0;
+
+    t0 = tcg_temp_local_new(TCG_TYPE_TL);
+    t1 = tcg_temp_local_new(TCG_TYPE_TL);
+    t2 = tcg_temp_local_new(TCG_TYPE_TL);
+    a0 = tcg_temp_local_new(TCG_TYPE_TL);
 
     if (ot == OT_QUAD)
         mask = 0x3f;
@@ -1690,95 +1721,102 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
         mask = 0x1f;
 
     /* load */
-    if (op1 == OR_TMP0)
-        gen_op_ld_T0_A0(ot + s->mem_index);
-    else
-        gen_op_mov_TN_reg(ot, 0, op1);
+    if (op1 == OR_TMP0) {
+        tcg_gen_mov_tl(a0, cpu_A0);
+        gen_op_ld_v(ot + s->mem_index, t0, a0);
+    } else {
+        gen_op_mov_v_reg(ot, t0, op1);
+    }
 
     tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
+
+    tcg_gen_mov_tl(t1, cpu_T[1]);
+    tcg_gen_mov_tl(t2, cpu_T3);
+
     /* Must test zero case to avoid using undefined behaviour in TCG
        shifts. */
     label1 = gen_new_label();
-    tcg_gen_brcond_tl(TCG_COND_EQ, cpu_T3, tcg_const_tl(0), label1);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
     
-    tcg_gen_addi_tl(cpu_tmp5, cpu_T3, -1);
+    tcg_gen_addi_tl(cpu_tmp5, t2, -1);
     if (ot == OT_WORD) {
         /* Note: we implement the Intel behaviour for shift count > 16 */
         if (is_right) {
-            tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
-            tcg_gen_shli_tl(cpu_tmp0, cpu_T[1], 16);
-            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
-            tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_andi_tl(t0, t0, 0xffff);
+            tcg_gen_shli_tl(cpu_tmp0, t1, 16);
+            tcg_gen_or_tl(t0, t0, cpu_tmp0);
+            tcg_gen_ext32u_tl(t0, t0);
 
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_tmp5);
+            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
             
             /* only needed if count > 16, but a test would complicate */
-            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), cpu_T3);
-            tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp5);
+            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
+            tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
 
-            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T3);
+            tcg_gen_shr_tl(t0, t0, t2);
 
-            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            tcg_gen_or_tl(t0, t0, cpu_tmp0);
         } else {
             /* XXX: not optimal */
-            tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
-            tcg_gen_shli_tl(cpu_T[1], cpu_T[1], 16);
-            tcg_gen_or_tl(cpu_T[1], cpu_T[1], cpu_T[0]);
-            tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
+            tcg_gen_andi_tl(t0, t0, 0xffff);
+            tcg_gen_shli_tl(t1, t1, 16);
+            tcg_gen_or_tl(t1, t1, t0);
+            tcg_gen_ext32u_tl(t1, t1);
             
-            tcg_gen_shl_tl(cpu_tmp4, cpu_T[0], cpu_tmp5);
+            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
             tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(32), cpu_tmp5);
-            tcg_gen_shr_tl(cpu_tmp6, cpu_T[1], cpu_tmp0);
+            tcg_gen_shr_tl(cpu_tmp6, t1, cpu_tmp0);
             tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp6);
 
-            tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T3);
-            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), cpu_T3);
-            tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp5);
-            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_shl_tl(t0, t0, t2);
+            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
+            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
+            tcg_gen_or_tl(t0, t0, t1);
         }
     } else {
         data_bits = 8 << ot;
         if (is_right) {
             if (ot == OT_LONG)
-                tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext32u_tl(t0, t0);
 
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_tmp5);
+            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
 
-            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T3);
-            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), cpu_T3);
-            tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp5);
-            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_shr_tl(t0, t0, t2);
+            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
+            tcg_gen_shl_tl(t1, t1, cpu_tmp5);
+            tcg_gen_or_tl(t0, t0, t1);
             
         } else {
             if (ot == OT_LONG)
-                tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
+                tcg_gen_ext32u_tl(t1, t1);
 
-            tcg_gen_shl_tl(cpu_tmp4, cpu_T[0], cpu_tmp5);
+            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
             
-            tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T3);
-            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), cpu_T3);
-            tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp5);
-            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_shl_tl(t0, t0, t2);
+            tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
+            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
+            tcg_gen_or_tl(t0, t0, t1);
         }
     }
-    tcg_gen_mov_tl(cpu_T[1], cpu_tmp4);
+    tcg_gen_mov_tl(t1, cpu_tmp4);
 
     gen_set_label(label1);
     /* store */
-    if (op1 == OR_TMP0)
-        gen_op_st_T0_A0(ot + s->mem_index);
-    else
-        gen_op_mov_reg_T0(ot, op1);
+    if (op1 == OR_TMP0) {
+        gen_op_st_v(ot + s->mem_index, t0, a0);
+    } else {
+        gen_op_mov_reg_v(ot, op1, t0);
+    }
     
     /* update eflags */
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
 
     label2 = gen_new_label();
-    tcg_gen_brcond_tl(TCG_COND_EQ, cpu_T3, tcg_const_tl(0), label2);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
 
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+    tcg_gen_mov_tl(cpu_cc_src, t1);
+    tcg_gen_mov_tl(cpu_cc_dst, t0);
     if (is_right) {
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
     } else {
@@ -1786,6 +1824,11 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
     }
     gen_set_label(label2);
     s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_temp_free(a0);
 }
 
 static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
@@ -2218,23 +2261,26 @@ static inline void gen_jcc(DisasContext *s, int b,
 static void gen_setcc(DisasContext *s, int b)
 {
     int inv, jcc_op, l1;
+    TCGv t0;
 
     if (is_fast_jcc_case(s, b)) {
         /* nominal case: we use a jump */
-        tcg_gen_movi_tl(cpu_T[0], 0);
+        /* XXX: make it faster by adding new instructions in TCG */
+        t0 = tcg_temp_local_new(TCG_TYPE_TL);
+        tcg_gen_movi_tl(t0, 0);
         l1 = gen_new_label();
         gen_jcc1(s, s->cc_op, b ^ 1, l1);
-        tcg_gen_movi_tl(cpu_T[0], 1);
+        tcg_gen_movi_tl(t0, 1);
         gen_set_label(l1);
+        tcg_gen_mov_tl(cpu_T[0], t0);
+        tcg_temp_free(t0);
     } else {
         /* slow case: it is more efficient not to generate a jump,
            although it is questionnable whether this optimization is
            worth to */
         inv = b & 1;
         jcc_op = (b >> 1) & 7;
-        if (s->cc_op != CC_OP_DYNAMIC)
-            gen_op_set_cc_op(s->cc_op);
-        gen_setcc_slow_T0(jcc_op);
+        gen_setcc_slow_T0(s, jcc_op);
         if (inv) {
             tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
         }
@@ -2286,59 +2332,24 @@ static inline int svm_is_rep(int prefixes)
     return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
 }
 
-static inline int
+static inline void
 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
                               uint32_t type, uint64_t param)
 {
-    if(!(s->flags & (INTERCEPT_SVM_MASK)))
-       /* no SVM activated */
-        return 0;
-    switch(type) {
-        /* CRx and DRx reads/writes */
-        case SVM_EXIT_READ_CR0 ... SVM_EXIT_EXCP_BASE - 1:
-            if (s->cc_op != CC_OP_DYNAMIC) {
-                gen_op_set_cc_op(s->cc_op);
-            }
-            gen_jmp_im(pc_start - s->cs_base);
-            tcg_gen_helper_0_2(helper_svm_check_intercept_param, 
-                               tcg_const_i32(type), tcg_const_i64(param));
-            /* this is a special case as we do not know if the interception occurs
-               so we assume there was none */
-            return 0;
-        case SVM_EXIT_MSR:
-            if(s->flags & (1ULL << INTERCEPT_MSR_PROT)) {
-                if (s->cc_op != CC_OP_DYNAMIC) {
-                    gen_op_set_cc_op(s->cc_op);
-                }
-                gen_jmp_im(pc_start - s->cs_base);
-                tcg_gen_helper_0_2(helper_svm_check_intercept_param,
-                                   tcg_const_i32(type), tcg_const_i64(param));
-                /* this is a special case as we do not know if the interception occurs
-                   so we assume there was none */
-                return 0;
-            }
-            break;
-        default:
-            if(s->flags & (1ULL << ((type - SVM_EXIT_INTR) + INTERCEPT_INTR))) {
-                if (s->cc_op != CC_OP_DYNAMIC) {
-                    gen_op_set_cc_op(s->cc_op);
-                }
-                gen_jmp_im(pc_start - s->cs_base);
-                tcg_gen_helper_0_2(helper_vmexit,
-                                   tcg_const_i32(type), tcg_const_i64(param));
-                /* we can optimize this one so TBs don't get longer
-                   than up to vmexit */
-                gen_eob(s);
-                return 1;
-            }
-    }
-    return 0;
+    /* no SVM activated; fast case */
+    if (likely(!(s->flags & HF_SVMI_MASK)))
+        return;
+    if (s->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s->cc_op);
+    gen_jmp_im(pc_start - s->cs_base);
+    tcg_gen_helper_0_2(helper_svm_check_intercept_param, 
+                       tcg_const_i32(type), tcg_const_i64(param));
 }
 
-static inline int
+static inline void
 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
 {
-    return gen_svm_check_intercept_param(s, pc_start, type, 0);
+    gen_svm_check_intercept_param(s, pc_start, type, 0);
 }
 
 static inline void gen_stack_update(DisasContext *s, int addend)
@@ -3586,6 +3597,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     target_ulong next_eip, tval;
     int rex_w, rex_r;
 
+    if (unlikely(loglevel & CPU_LOG_TB_OP))
+        tcg_gen_debug_insn_start(pc_start);
     s->pc = pc_start;
     prefixes = 0;
     aflag = s->code32;
@@ -3785,9 +3798,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         }
         break;
 
+    case 0x82:
+        if (CODE64(s))
+            goto illegal_op;
     case 0x80: /* GRP1 */
     case 0x81:
-    case 0x82:
     case 0x83:
         {
             int val;
@@ -4352,6 +4367,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0x1b1: /* cmpxchg Ev, Gv */
         {
             int label1, label2;
+            TCGv t0, t1, t2, a0;
 
             if ((b & 1) == 0)
                 ot = OT_BYTE;
@@ -4360,37 +4376,46 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             modrm = ldub_code(s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
-            gen_op_mov_TN_reg(ot, 1, reg);
+            t0 = tcg_temp_local_new(TCG_TYPE_TL);
+            t1 = tcg_temp_local_new(TCG_TYPE_TL);
+            t2 = tcg_temp_local_new(TCG_TYPE_TL);
+            a0 = tcg_temp_local_new(TCG_TYPE_TL);
+            gen_op_mov_v_reg(ot, t1, reg);
             if (mod == 3) {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_mov_TN_reg(ot, 0, rm);
+                gen_op_mov_v_reg(ot, t0, rm);
             } else {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-                gen_op_ld_T0_A0(ot + s->mem_index);
+                tcg_gen_mov_tl(a0, cpu_A0);
+                gen_op_ld_v(ot + s->mem_index, t0, a0);
                 rm = 0; /* avoid warning */
             }
             label1 = gen_new_label();
-            tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_EAX]));
-            tcg_gen_sub_tl(cpu_T3, cpu_T3, cpu_T[0]);
-            gen_extu(ot, cpu_T3);
-            tcg_gen_brcond_tl(TCG_COND_EQ, cpu_T3, tcg_const_tl(0), label1);
+            tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
+            tcg_gen_sub_tl(t2, t2, t0);
+            gen_extu(ot, t2);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
             if (mod == 3) {
                 label2 = gen_new_label();
-                gen_op_mov_reg_T0(ot, R_EAX);
+                gen_op_mov_reg_v(ot, R_EAX, t0);
                 tcg_gen_br(label2);
                 gen_set_label(label1);
-                gen_op_mov_reg_T1(ot, rm);
+                gen_op_mov_reg_v(ot, rm, t1);
                 gen_set_label(label2);
             } else {
-                tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
-                gen_op_mov_reg_T0(ot, R_EAX);
+                tcg_gen_mov_tl(t1, t0);
+                gen_op_mov_reg_v(ot, R_EAX, t0);
                 gen_set_label(label1);
                 /* always store */
-                gen_op_st_T1_A0(ot + s->mem_index);
+                gen_op_st_v(ot + s->mem_index, t1, a0);
             }
-            tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_T3);
+            tcg_gen_mov_tl(cpu_cc_src, t0);
+            tcg_gen_mov_tl(cpu_cc_dst, t2);
             s->cc_op = CC_OP_SUBB + ot;
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+            tcg_temp_free(t2);
+            tcg_temp_free(a0);
         }
         break;
     case 0x1c7: /* cmpxchg8b */
@@ -5456,10 +5481,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         (JCC_BE << 1),
                         (JCC_P << 1),
                     };
-                    op1 = fcmov_cc[op & 3] | ((op >> 3) & 1);
-                    gen_setcc(s, op1);
+                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
                     l1 = gen_new_label();
-                    tcg_gen_brcond_tl(TCG_COND_EQ, cpu_T[0], tcg_const_tl(0), l1);
+                    gen_jcc1(s, s->cc_op, op1, l1);
                     tcg_gen_helper_0_1(helper_fmov_ST0_STN, tcg_const_i32(opreg));
                     gen_set_label(l1);
                 }
@@ -5556,6 +5580,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
         } else {
             gen_ins(s, ot);
+            if (use_icount) {
+                gen_jmp(s, s->pc - s->cs_base);
+            }
         }
         break;
     case 0x6e: /* outsS */
@@ -5572,6 +5599,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
         } else {
             gen_outs(s, ot);
+            if (use_icount) {
+                gen_jmp(s, s->pc - s->cs_base);
+            }
         }
         break;
 
@@ -5588,9 +5618,15 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         gen_op_movl_T0_im(val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+        if (use_icount)
+            gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_helper_1_1(helper_in_func[ot], cpu_T[1], cpu_tmp2_i32);
         gen_op_mov_reg_T1(ot, R_EAX);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
         break;
     case 0xe6:
     case 0xe7:
@@ -5604,10 +5640,16 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                      svm_is_rep(prefixes));
         gen_op_mov_TN_reg(ot, 1, R_EAX);
 
+        if (use_icount)
+            gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         tcg_gen_helper_0_2(helper_out_func[ot], cpu_tmp2_i32, cpu_tmp3_i32);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
         break;
     case 0xec:
     case 0xed:
@@ -5619,9 +5661,15 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         gen_op_andl_T0_ffff();
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+        if (use_icount)
+            gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_helper_1_1(helper_in_func[ot], cpu_T[1], cpu_tmp2_i32);
         gen_op_mov_reg_T1(ot, R_EAX);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
         break;
     case 0xee:
     case 0xef:
@@ -5635,10 +5683,16 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                      svm_is_rep(prefixes));
         gen_op_mov_TN_reg(ot, 1, R_EAX);
 
+        if (use_icount)
+            gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         tcg_gen_helper_0_2(helper_out_func[ot], cpu_tmp2_i32, cpu_tmp3_i32);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
         break;
 
         /************************/
@@ -5696,8 +5750,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         val = 0;
         goto do_lret;
     case 0xcf: /* iret */
-        if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET))
-            break;
+        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
         if (!s->pe) {
             /* real mode */
             tcg_gen_helper_0_1(helper_iret_real, tcg_const_i32(s->dflag));
@@ -5805,25 +5858,26 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         {
             int l1;
+            TCGv t0;
+
             ot = dflag + OT_WORD;
             modrm = ldub_code(s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
+            t0 = tcg_temp_local_new(TCG_TYPE_TL);
             if (mod != 3) {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-                gen_op_ld_T1_A0(ot + s->mem_index);
+                gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_mov_TN_reg(ot, 1, rm);
+                gen_op_mov_v_reg(ot, t0, rm);
             }
-            if (s->cc_op != CC_OP_DYNAMIC)
-                gen_op_set_cc_op(s->cc_op);
 #ifdef TARGET_X86_64
             if (ot == OT_LONG) {
                 /* XXX: specific Intel behaviour ? */
                 l1 = gen_new_label();
                 gen_jcc1(s, s->cc_op, b ^ 1, l1);
-                tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
+                tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET);
                 gen_set_label(l1);
                 tcg_gen_movi_tl(cpu_tmp0, 0);
                 tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET);
@@ -5832,17 +5886,17 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             {
                 l1 = gen_new_label();
                 gen_jcc1(s, s->cc_op, b ^ 1, l1);
-                gen_op_mov_reg_T1(ot, reg);
+                gen_op_mov_reg_v(ot, reg, t0);
                 gen_set_label(l1);
             }
+            tcg_temp_free(t0);
         }
         break;
 
         /************************/
         /* flags */
     case 0x9c: /* pushf */
-        if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF))
-            break;
+        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
         if (s->vm86 && s->iopl != 3) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
@@ -5853,8 +5907,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         }
         break;
     case 0x9d: /* popf */
-        if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF))
-            break;
+        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
         if (s->vm86 && s->iopl != 3) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
@@ -6038,6 +6091,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0x1bd: /* bsr */
         {
             int label1;
+            TCGv t0;
+
             ot = dflag + OT_WORD;
             modrm = ldub_code(s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
@@ -6045,17 +6100,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_extu(ot, cpu_T[0]);
             label1 = gen_new_label();
             tcg_gen_movi_tl(cpu_cc_dst, 0);
-            tcg_gen_brcond_tl(TCG_COND_EQ, cpu_T[0], tcg_const_tl(0), label1);
+            t0 = tcg_temp_local_new(TCG_TYPE_TL);
+            tcg_gen_mov_tl(t0, cpu_T[0]);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
             if (b & 1) {
-                tcg_gen_helper_1_1(helper_bsr, cpu_T[0], cpu_T[0]);
+                tcg_gen_helper_1_1(helper_bsr, cpu_T[0], t0);
             } else {
-                tcg_gen_helper_1_1(helper_bsf, cpu_T[0], cpu_T[0]);
+                tcg_gen_helper_1_1(helper_bsf, cpu_T[0], t0);
             }
             gen_op_mov_reg_T0(ot, reg);
             tcg_gen_movi_tl(cpu_cc_dst, 1);
             gen_set_label(label1);
             tcg_gen_discard_tl(cpu_cc_src);
             s->cc_op = CC_OP_LOGICB + ot;
+            tcg_temp_free(t0);
         }
         break;
         /************************/
@@ -6133,14 +6191,10 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         }
         break;
     case 0xcc: /* int3 */
-        if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_SWINT))
-            break;
         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
         break;
     case 0xcd: /* int N */
         val = ldub_code(s->pc++);
-        if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_SWINT))
-            break;
         if (s->vm86 && s->iopl != 3) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
@@ -6150,16 +6204,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0xce: /* into */
         if (CODE64(s))
             goto illegal_op;
-        if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_SWINT))
-            break;
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_helper_0_1(helper_into, tcg_const_i32(s->pc - pc_start));
         break;
     case 0xf1: /* icebp (undocumented, exits to external debugger) */
-        if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP))
-            break;
+        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
 #if 1
         gen_debug(s, pc_start - s->cs_base);
 #else
@@ -6287,11 +6338,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_compute_eflags(cpu_tmp0);
                 tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
                 if (b == 0) {
-                    tcg_gen_brcond_tl(TCG_COND_EQ, 
-                                      cpu_tmp0, tcg_const_tl(0), l1);
+                    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
                 } else {
-                    tcg_gen_brcond_tl(TCG_COND_NE, 
-                                      cpu_tmp0, tcg_const_tl(0), l1);
+                    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1);
                 }
                 break;
             case 2: /* loop */
@@ -6319,25 +6368,25 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            int retval = 0;
+            if (s->cc_op != CC_OP_DYNAMIC)
+                gen_op_set_cc_op(s->cc_op);
+            gen_jmp_im(pc_start - s->cs_base);
             if (b & 2) {
-                retval = gen_svm_check_intercept_param(s, pc_start, SVM_EXIT_MSR, 0);
                 tcg_gen_helper_0_0(helper_rdmsr);
             } else {
-                retval = gen_svm_check_intercept_param(s, pc_start, SVM_EXIT_MSR, 1);
                 tcg_gen_helper_0_0(helper_wrmsr);
             }
-            if(retval)
-                gen_eob(s);
         }
         break;
     case 0x131: /* rdtsc */
-        if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_RDTSC))
-            break;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_helper_0_0(helper_rdtsc);
         break;
     case 0x133: /* rdpmc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_helper_0_0(helper_rdpmc);
         break;
@@ -6400,20 +6449,19 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         break;
 #endif
     case 0x1a2: /* cpuid */
-        if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_CPUID))
-            break;
+        if (s->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s->cc_op);
+        gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_helper_0_0(helper_cpuid);
         break;
     case 0xf4: /* hlt */
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_HLT))
-                break;
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
-            gen_jmp_im(s->pc - s->cs_base);
-            tcg_gen_helper_0_0(helper_hlt);
+            gen_jmp_im(pc_start - s->cs_base);
+            tcg_gen_helper_0_1(helper_hlt, tcg_const_i32(s->pc - pc_start));
             s->is_jmp = 3;
         }
         break;
@@ -6425,8 +6473,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         case 0: /* sldt */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ))
-                break;
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,ldt.selector));
             ot = OT_WORD;
             if (mod == 3)
@@ -6439,8 +6486,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
-                if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE))
-                    break;
+                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
                 gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -6450,8 +6496,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         case 1: /* str */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ))
-                break;
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,tr.selector));
             ot = OT_WORD;
             if (mod == 3)
@@ -6464,8 +6509,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
-                if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE))
-                    break;
+                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
                 gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -6498,8 +6542,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         case 0: /* sgdt */
             if (mod == 3)
                 goto illegal_op;
-            if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ))
-                break;
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit));
             gen_op_st_T0_A0(OT_WORD + s->mem_index);
@@ -6516,8 +6559,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
                         s->cpl != 0)
                         goto illegal_op;
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_MONITOR))
-                        break;
+                    if (s->cc_op != CC_OP_DYNAMIC)
+                        gen_op_set_cc_op(s->cc_op);
                     gen_jmp_im(pc_start - s->cs_base);
 #ifdef TARGET_X86_64
                     if (s->aflag == 2) {
@@ -6540,18 +6583,15 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         gen_op_set_cc_op(s->cc_op);
                         s->cc_op = CC_OP_DYNAMIC;
                     }
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_MWAIT))
-                        break;
-                    gen_jmp_im(s->pc - s->cs_base);
-                    tcg_gen_helper_0_0(helper_mwait);
+                    gen_jmp_im(pc_start - s->cs_base);
+                    tcg_gen_helper_0_1(helper_mwait, tcg_const_i32(s->pc - pc_start));
                     gen_eob(s);
                     break;
                 default:
                     goto illegal_op;
                 }
             } else { /* sidt */
-                if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ))
-                    break;
+                gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit));
                 gen_op_st_T0_A0(OT_WORD + s->mem_index);
@@ -6565,52 +6605,90 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         case 2: /* lgdt */
         case 3: /* lidt */
             if (mod == 3) {
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
                 switch(rm) {
                 case 0: /* VMRUN */
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_VMRUN))
+                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
-                    if (s->cc_op != CC_OP_DYNAMIC)
-                        gen_op_set_cc_op(s->cc_op);
-                    gen_jmp_im(s->pc - s->cs_base);
-                    tcg_gen_helper_0_0(helper_vmrun);
-                    s->cc_op = CC_OP_EFLAGS;
-                    gen_eob(s);
+                    } else {
+                        tcg_gen_helper_0_2(helper_vmrun, 
+                                           tcg_const_i32(s->aflag),
+                                           tcg_const_i32(s->pc - pc_start));
+                        tcg_gen_exit_tb(0);
+                        s->is_jmp = 3;
+                    }
                     break;
                 case 1: /* VMMCALL */
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_VMMCALL))
-                         break;
-                    /* FIXME: cause #UD if hflags & SVM */
+                    if (!(s->flags & HF_SVME_MASK))
+                        goto illegal_op;
                     tcg_gen_helper_0_0(helper_vmmcall);
                     break;
                 case 2: /* VMLOAD */
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_VMLOAD))
-                         break;
-                    tcg_gen_helper_0_0(helper_vmload);
+                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        tcg_gen_helper_0_1(helper_vmload,
+                                           tcg_const_i32(s->aflag));
+                    }
                     break;
                 case 3: /* VMSAVE */
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_VMSAVE))
-                         break;
-                    tcg_gen_helper_0_0(helper_vmsave);
+                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        tcg_gen_helper_0_1(helper_vmsave,
+                                           tcg_const_i32(s->aflag));
+                    }
                     break;
                 case 4: /* STGI */
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_STGI))
-                         break;
-                    tcg_gen_helper_0_0(helper_stgi);
+                    if ((!(s->flags & HF_SVME_MASK) &&
+                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
+                        !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        tcg_gen_helper_0_0(helper_stgi);
+                    }
                     break;
                 case 5: /* CLGI */
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_CLGI))
-                         break;
-                    tcg_gen_helper_0_0(helper_clgi);
+                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        tcg_gen_helper_0_0(helper_clgi);
+                    }
                     break;
                 case 6: /* SKINIT */
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_SKINIT))
-                         break;
+                    if ((!(s->flags & HF_SVME_MASK) && 
+                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
+                        !s->pe)
+                        goto illegal_op;
                     tcg_gen_helper_0_0(helper_skinit);
                     break;
                 case 7: /* INVLPGA */
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_INVLPGA))
-                         break;
-                    tcg_gen_helper_0_0(helper_invlpga);
+                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
+                        goto illegal_op;
+                    if (s->cpl != 0) {
+                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                        break;
+                    } else {
+                        tcg_gen_helper_0_1(helper_invlpga,
+                                           tcg_const_i32(s->aflag));
+                    }
                     break;
                 default:
                     goto illegal_op;
@@ -6618,9 +6696,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             } else if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
-                if (gen_svm_check_intercept(s, pc_start,
-                                            op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE))
-                    break;
+                gen_svm_check_intercept(s, pc_start,
+                                        op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE);
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_T1_A0(OT_WORD + s->mem_index);
                 gen_add_A0_im(s, 2);
@@ -6637,8 +6714,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             }
             break;
         case 4: /* smsw */
-            if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0))
-                break;
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
             gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 1);
             break;
@@ -6646,8 +6722,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
-                if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0))
-                    break;
+                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
                 gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
                 tcg_gen_helper_0_1(helper_lmsw, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
@@ -6672,8 +6747,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         goto illegal_op;
                     }
                 } else {
-                    if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_INVLPG))
-                        break;
+                    if (s->cc_op != CC_OP_DYNAMIC)
+                        gen_op_set_cc_op(s->cc_op);
+                    gen_jmp_im(pc_start - s->cs_base);
                     gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                     tcg_gen_helper_0_1(helper_invlpg, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
@@ -6690,8 +6766,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            if (gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD))
-                break;
+            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
             /* nothing to do */
         }
         break;
@@ -6726,8 +6801,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
 #endif
         {
             int label1;
+            TCGv t0, t1, t2;
+
             if (!s->pe || s->vm86)
                 goto illegal_op;
+            t0 = tcg_temp_local_new(TCG_TYPE_TL);
+            t1 = tcg_temp_local_new(TCG_TYPE_TL);
+            t2 = tcg_temp_local_new(TCG_TYPE_TL);
             ot = OT_WORD;
             modrm = ldub_code(s->pc++);
             reg = (modrm >> 3) & 7;
@@ -6735,55 +6815,61 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             rm = modrm & 7;
             if (mod != 3) {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-                gen_op_ld_T0_A0(ot + s->mem_index);
+                gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
             } else {
-                gen_op_mov_TN_reg(ot, 0, rm);
+                gen_op_mov_v_reg(ot, t0, rm);
             }
-            gen_op_mov_TN_reg(ot, 1, reg);
-            tcg_gen_andi_tl(cpu_tmp0, cpu_T[0], 3);
-            tcg_gen_andi_tl(cpu_T[1], cpu_T[1], 3);
-            tcg_gen_movi_tl(cpu_T3, 0);
+            gen_op_mov_v_reg(ot, t1, reg);
+            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
+            tcg_gen_andi_tl(t1, t1, 3);
+            tcg_gen_movi_tl(t2, 0);
             label1 = gen_new_label();
-            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, cpu_T[1], label1);
-            tcg_gen_andi_tl(cpu_T[0], cpu_T[0], ~3);
-            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-            tcg_gen_movi_tl(cpu_T3, CC_Z);
+            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
+            tcg_gen_andi_tl(t0, t0, ~3);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_gen_movi_tl(t2, CC_Z);
             gen_set_label(label1);
             if (mod != 3) {
-                gen_op_st_T0_A0(ot + s->mem_index);
+                gen_op_st_v(ot + s->mem_index, t0, cpu_A0);
             } else {
-                gen_op_mov_reg_T0(ot, rm);
+                gen_op_mov_reg_v(ot, rm, t0);
             }
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
             gen_compute_eflags(cpu_cc_src);
             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
-            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T3);
+            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
             s->cc_op = CC_OP_EFLAGS;
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+            tcg_temp_free(t2);
         }
         break;
     case 0x102: /* lar */
     case 0x103: /* lsl */
         {
             int label1;
+            TCGv t0;
             if (!s->pe || s->vm86)
                 goto illegal_op;
             ot = dflag ? OT_LONG : OT_WORD;
             modrm = ldub_code(s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+            t0 = tcg_temp_local_new(TCG_TYPE_TL);
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
             if (b == 0x102)
-                tcg_gen_helper_1_1(helper_lar, cpu_T[0], cpu_T[0]);
+                tcg_gen_helper_1_1(helper_lar, t0, cpu_T[0]);
             else
-                tcg_gen_helper_1_1(helper_lsl, cpu_T[0], cpu_T[0]);
+                tcg_gen_helper_1_1(helper_lsl, t0, cpu_T[0]);
             tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
-            tcg_gen_brcond_tl(TCG_COND_EQ, cpu_tmp0, tcg_const_tl(0), label1);
-            gen_op_mov_reg_T0(ot, reg);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
+            gen_op_mov_reg_v(ot, reg, t0);
             gen_set_label(label1);
             s->cc_op = CC_OP_EFLAGS;
+            tcg_temp_free(t0);
         }
         break;
     case 0x118:
@@ -6829,21 +6915,18 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             case 3:
             case 4:
             case 8:
+                if (s->cc_op != CC_OP_DYNAMIC)
+                    gen_op_set_cc_op(s->cc_op);
+                gen_jmp_im(pc_start - s->cs_base);
                 if (b & 2) {
-                    gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0 + reg);
                     gen_op_mov_TN_reg(ot, 0, rm);
-                    tcg_gen_helper_0_2(helper_movl_crN_T0
+                    tcg_gen_helper_0_2(helper_write_crN
                                        tcg_const_i32(reg), cpu_T[0]);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
                 } else {
-                    gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0 + reg);
-#if !defined(CONFIG_USER_ONLY)
-                    if (reg == 8)
-                        tcg_gen_helper_1_0(helper_movtl_T0_cr8, cpu_T[0]);
-                    else
-#endif
-                        tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[reg]));
+                    tcg_gen_helper_1_1(helper_read_crN, 
+                                       cpu_T[0], tcg_const_i32(reg));
                     gen_op_mov_reg_T0(ot, rm);
                 }
                 break;
@@ -6991,8 +7074,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
-        if (gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM))
-            break;
+        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
         if (!(s->flags & HF_SMM_MASK))
             goto illegal_op;
         if (s->cc_op != CC_OP_DYNAMIC) {
@@ -7030,17 +7112,6 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     return s->pc;
 }
 
-static void tcg_macro_func(TCGContext *s, int macro_id, const int *dead_args)
-{
-    switch(macro_id) {
-#ifdef MACRO_TEST
-    case MACRO_TEST:
-        tcg_gen_helper_0_1(helper_divl_EAX_T0, cpu_T[0]);
-        break;
-#endif
-    }
-}
-
 void optimize_flags_init(void)
 {
 #if TCG_TARGET_REG_BITS == 32
@@ -7048,33 +7119,20 @@ void optimize_flags_init(void)
 #else
     assert(sizeof(CCTable) == (1 << 4));
 #endif
-    tcg_set_macro_func(&tcg_ctx, tcg_macro_func);
-
     cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
-#if TARGET_LONG_BITS > HOST_LONG_BITS
-    cpu_T[0] = tcg_global_mem_new(TCG_TYPE_TL, 
-                                  TCG_AREG0, offsetof(CPUState, t0), "T0");
-    cpu_T[1] = tcg_global_mem_new(TCG_TYPE_TL,
-                                  TCG_AREG0, offsetof(CPUState, t1), "T1");
-    cpu_A0 = tcg_global_mem_new(TCG_TYPE_TL,
-                                TCG_AREG0, offsetof(CPUState, t2), "A0");
-#else
-    cpu_T[0] = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG1, "T0");
-    cpu_T[1] = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG2, "T1");
-    cpu_A0 = tcg_global_reg_new(TCG_TYPE_TL, TCG_AREG3, "A0");
-#endif
-    cpu_T3 = tcg_global_mem_new(TCG_TYPE_TL,
-                                TCG_AREG0, offsetof(CPUState, t3), "T3");
-#if defined(__i386__) && (TARGET_LONG_BITS <= HOST_LONG_BITS)
-    /* XXX: must be suppressed once there are less fixed registers */
-    cpu_tmp1_i64 = tcg_global_reg2_new_hack(TCG_TYPE_I64, TCG_AREG1, TCG_AREG2, "tmp1");
-#endif
     cpu_cc_op = tcg_global_mem_new(TCG_TYPE_I32,
                                    TCG_AREG0, offsetof(CPUState, cc_op), "cc_op");
     cpu_cc_src = tcg_global_mem_new(TCG_TYPE_TL,
                                     TCG_AREG0, offsetof(CPUState, cc_src), "cc_src");
     cpu_cc_dst = tcg_global_mem_new(TCG_TYPE_TL,
                                     TCG_AREG0, offsetof(CPUState, cc_dst), "cc_dst");
+    cpu_cc_tmp = tcg_global_mem_new(TCG_TYPE_TL,
+                                    TCG_AREG0, offsetof(CPUState, cc_tmp), "cc_tmp");
+
+    /* register helpers */
+
+#define DEF_HELPER(ret, name, params) tcg_register_helper(name, #name);
+#include "helper.h"
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
@@ -7091,6 +7149,8 @@ static inline int gen_intermediate_code_internal(CPUState *env,
     uint64_t flags;
     target_ulong pc_start;
     target_ulong cs_base;
+    int num_insns;
+    int max_insns;
 
     /* generate intermediate code */
     pc_start = tb->pc;
@@ -7141,10 +7201,13 @@ static inline int gen_intermediate_code_internal(CPUState *env,
         printf("ERROR addseg\n");
 #endif
 
+    cpu_T[0] = tcg_temp_new(TCG_TYPE_TL);
+    cpu_T[1] = tcg_temp_new(TCG_TYPE_TL);
+    cpu_A0 = tcg_temp_new(TCG_TYPE_TL);
+    cpu_T3 = tcg_temp_new(TCG_TYPE_TL);
+
     cpu_tmp0 = tcg_temp_new(TCG_TYPE_TL);
-#if !(defined(__i386__) && (TARGET_LONG_BITS <= HOST_LONG_BITS))
     cpu_tmp1_i64 = tcg_temp_new(TCG_TYPE_I64);
-#endif
     cpu_tmp2_i32 = tcg_temp_new(TCG_TYPE_I32);
     cpu_tmp3_i32 = tcg_temp_new(TCG_TYPE_I32);
     cpu_tmp4 = tcg_temp_new(TCG_TYPE_TL);
@@ -7158,7 +7221,12 @@ static inline int gen_intermediate_code_internal(CPUState *env,
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
     lj = -1;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
 
+    gen_icount_start();
     for(;;) {
         if (env->nb_breakpoints > 0) {
             for(j = 0; j < env->nb_breakpoints; j++) {
@@ -7178,8 +7246,13 @@ static inline int gen_intermediate_code_internal(CPUState *env,
             gen_opc_pc[lj] = pc_ptr;
             gen_opc_cc_op[lj] = dc->cc_op;
             gen_opc_instr_start[lj] = 1;
+            gen_opc_icount[lj] = num_insns;
         }
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
+
         pc_ptr = disas_insn(dc, pc_ptr);
+        num_insns++;
         /* stop translation if indicated */
         if (dc->is_jmp)
             break;
@@ -7189,20 +7262,23 @@ static inline int gen_intermediate_code_internal(CPUState *env,
            the flag and abort the translation to give the irqs a
            change to be happen */
         if (dc->tf || dc->singlestep_enabled ||
-            (flags & HF_INHIBIT_IRQ_MASK) ||
-            (cflags & CF_SINGLE_INSN)) {
+            (flags & HF_INHIBIT_IRQ_MASK)) {
             gen_jmp_im(pc_ptr - dc->cs_base);
             gen_eob(dc);
             break;
         }
         /* if too long translation, stop generation too */
         if (gen_opc_ptr >= gen_opc_end ||
-            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32)) {
+            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
+            num_insns >= max_insns) {
             gen_jmp_im(pc_ptr - dc->cs_base);
             gen_eob(dc);
             break;
         }
     }
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
     /* we don't forget to fill the last values */
     if (search_pc) {
@@ -7228,16 +7304,13 @@ static inline int gen_intermediate_code_internal(CPUState *env,
             disas_flags = !dc->code32;
        target_disas(logfile, pc_start, pc_ptr - pc_start, disas_flags);
         fprintf(logfile, "\n");
-        if (loglevel & CPU_LOG_TB_OP_OPT) {
-            fprintf(logfile, "OP before opt:\n");
-            tcg_dump_ops(&tcg_ctx, logfile);
-            fprintf(logfile, "\n");
-        }
     }
 #endif
 
-    if (!search_pc)
+    if (!search_pc) {
         tb->size = pc_ptr - pc_start;
+        tb->icount = num_insns;
+    }
     return 0;
 }