]> git.proxmox.com Git - qemu.git/blobdiff - target-i386/translate.c
usb-host: live migration support for the libusb version
[qemu.git] / target-i386 / translate.c
index 6b109e853b232046464d78c1c27f3ae9d9bfba8c..524a0b480e3ea899e412bea015db78be1f9f1307 100644 (file)
@@ -72,7 +72,6 @@ static TCGv cpu_tmp0, cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
-static TCGv cpu_tmp5;
 
 static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
 
@@ -239,10 +238,18 @@ static void set_cc_op(DisasContext *s, CCOp op)
         tcg_gen_discard_tl(cpu_cc_srcT);
     }
 
+    if (op == CC_OP_DYNAMIC) {
+        /* The DYNAMIC setting is translator only, and should never be
+           stored.  Thus we always consider it clean.  */
+        s->cc_op_dirty = false;
+    } else {
+        /* Discard any computed CC_OP value (see shifts).  */
+        if (s->cc_op == CC_OP_DYNAMIC) {
+            tcg_gen_discard_i32(cpu_cc_op);
+        }
+        s->cc_op_dirty = true;
+    }
     s->cc_op = op;
-    /* The DYNAMIC setting is translator only, and should never be
-       stored.  Thus we always consider it clean.  */
-    s->cc_op_dirty = (op != CC_OP_DYNAMIC);
 }
 
 static void gen_update_cc_op(DisasContext *s)
@@ -1569,12 +1576,55 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
+static void gen_shift_flags(DisasContext *s, int ot, TCGv result, TCGv shm1,
+                            TCGv count, bool is_right)
+{
+    TCGv_i32 z32, s32, oldop;
+    TCGv z_tl;
+
+    /* Store the results into the CC variables.  If we know that the
+       variable must be dead, store unconditionally.  Otherwise we'll
+       need to not disrupt the current contents.  */
+    z_tl = tcg_const_tl(0);
+    if (cc_op_live[s->cc_op] & USES_CC_DST) {
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
+                           result, cpu_cc_dst);
+    } else {
+        tcg_gen_mov_tl(cpu_cc_dst, result);
+    }
+    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
+                           shm1, cpu_cc_src);
+    } else {
+        tcg_gen_mov_tl(cpu_cc_src, shm1);
+    }
+    tcg_temp_free(z_tl);
+
+    /* Get the two potential CC_OP values into temporaries.  */
+    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
+    if (s->cc_op == CC_OP_DYNAMIC) {
+        oldop = cpu_cc_op;
+    } else {
+        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
+        oldop = cpu_tmp3_i32;
+    }
+
+    /* Conditionally store the CC_OP value.  */
+    z32 = tcg_const_i32(0);
+    s32 = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(s32, count);
+    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
+    tcg_temp_free_i32(z32);
+    tcg_temp_free_i32(s32);
+
+    /* The CC_OP value is no longer predictable.  */
+    set_cc_op(s, CC_OP_DYNAMIC);
+}
+
 static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, 
                             int is_right, int is_arith)
 {
     target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f);
-    TCGv_i32 z32, s32, oldop;
-    TCGv z_tl;
 
     /* load */
     if (op1 == OR_TMP0) {
@@ -1608,43 +1658,7 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
         gen_op_mov_reg_T0(ot, op1);
     }
 
-    /* Store the results into the CC variables.  If we know that the
-       variable must be dead, store unconditionally.  Otherwise we'll
-       need to not disrupt the current contents.  */
-    z_tl = tcg_const_tl(0);
-    if (cc_op_live[s->cc_op] & USES_CC_DST) {
-        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, cpu_T[1], z_tl,
-                           cpu_T[0], cpu_cc_dst);
-    } else {
-        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-    }
-    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
-        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, cpu_T[1], z_tl,
-                           cpu_tmp0, cpu_cc_src);
-    } else {
-        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp0);
-    }
-    tcg_temp_free(z_tl);
-
-    /* Get the two potential CC_OP values into temporaries.  */
-    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
-    if (s->cc_op == CC_OP_DYNAMIC) {
-        oldop = cpu_cc_op;
-    } else {
-        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
-        oldop = cpu_tmp3_i32;
-    }
-
-    /* Conditionally store the CC_OP value.  */
-    z32 = tcg_const_i32(0);
-    s32 = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(s32, cpu_T[1]);
-    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
-    tcg_temp_free_i32(z32);
-    tcg_temp_free_i32(s32);
-
-    /* The CC_OP value is no longer predictable.  */
-    set_cc_op(s, CC_OP_DYNAMIC);
+    gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
@@ -1761,6 +1775,7 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, int is_right)
     if (is_right) {
         tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
         tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
+        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
     } else {
         tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
         tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
@@ -1923,128 +1938,88 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
 
 /* XXX: add faster immediate case */
 static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
-                             int is_right, TCGv count)
+                             bool is_right, TCGv count_in)
 {
-    int label1, label2, data_bits;
-    target_ulong mask;
-    TCGv t0, t1, t2, a0;
-
-    t0 = tcg_temp_local_new();
-    t1 = tcg_temp_local_new();
-    t2 = tcg_temp_local_new();
-    a0 = tcg_temp_local_new();
-
-    if (ot == OT_QUAD)
-        mask = 0x3f;
-    else
-        mask = 0x1f;
+    target_ulong mask = (ot == OT_QUAD ? 63 : 31);
+    TCGv count;
 
     /* load */
     if (op1 == OR_TMP0) {
-        tcg_gen_mov_tl(a0, cpu_A0);
-        gen_op_ld_v(ot + s->mem_index, t0, a0);
+        gen_op_ld_T0_A0(ot + s->mem_index);
     } else {
-        gen_op_mov_v_reg(ot, t0, op1);
+        gen_op_mov_TN_reg(ot, 0, op1);
     }
 
-    tcg_gen_andi_tl(t2, count, mask);
-    tcg_gen_mov_tl(t1, cpu_T[1]);
+    count = tcg_temp_new();
+    tcg_gen_andi_tl(count, count_in, mask);
 
-    /* Must test zero case to avoid using undefined behaviour in TCG
-       shifts. */
-    label1 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
-    
-    tcg_gen_addi_tl(cpu_tmp5, t2, -1);
-    if (ot == OT_WORD) {
-        /* Note: we implement the Intel behaviour for shift count > 16 */
+    switch (ot) {
+    case OT_WORD:
+        /* Note: we implement the Intel behaviour for shift count > 16.
+           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
+           portion by constructing it as a 32-bit value.  */
         if (is_right) {
-            tcg_gen_andi_tl(t0, t0, 0xffff);
-            tcg_gen_shli_tl(cpu_tmp0, t1, 16);
-            tcg_gen_or_tl(t0, t0, cpu_tmp0);
-            tcg_gen_ext32u_tl(t0, t0);
-
-            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
-            
-            /* only needed if count > 16, but a test would complicate */
-            tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
-            tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
-
-            tcg_gen_shr_tl(t0, t0, t2);
-
-            tcg_gen_or_tl(t0, t0, cpu_tmp0);
+            tcg_gen_deposit_tl(cpu_tmp0, cpu_T[0], cpu_T[1], 16, 16);
+            tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
+            tcg_gen_mov_tl(cpu_T[0], cpu_tmp0);
         } else {
-            /* XXX: not optimal */
-            tcg_gen_andi_tl(t0, t0, 0xffff);
-            tcg_gen_shli_tl(t1, t1, 16);
-            tcg_gen_or_tl(t1, t1, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            
-            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
-            tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5);
-            tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0);
-            tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp5);
-
-            tcg_gen_shl_tl(t0, t0, t2);
-            tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
-            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
-            tcg_gen_or_tl(t0, t0, t1);
+            tcg_gen_deposit_tl(cpu_T[1], cpu_T[0], cpu_T[1], 16, 16);
         }
-    } else {
-        data_bits = 8 << ot;
+        /* FALLTHRU */
+#ifdef TARGET_X86_64
+    case OT_LONG:
+        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
+        tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
-            if (ot == OT_LONG)
-                tcg_gen_ext32u_tl(t0, t0);
-
-            tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
+            tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_shr_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
+            tcg_gen_shr_i64(cpu_T[0], cpu_T[0], count);
+        } else {
+            tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[1], cpu_T[0]);
+            tcg_gen_shl_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
+            tcg_gen_shl_i64(cpu_T[0], cpu_T[0], count);
+            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
+            tcg_gen_shri_i64(cpu_T[0], cpu_T[0], 32);
+        }
+        break;
+#endif
+    default:
+        tcg_gen_subi_tl(cpu_tmp0, count, 1);
+        if (is_right) {
+            tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
 
-            tcg_gen_shr_tl(t0, t0, t2);
-            tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
-            tcg_gen_shl_tl(t1, t1, cpu_tmp5);
-            tcg_gen_or_tl(t0, t0, t1);
-            
+            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], count);
+            tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
         } else {
-            if (ot == OT_LONG)
-                tcg_gen_ext32u_tl(t1, t1);
-
-            tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
-            
-            tcg_gen_shl_tl(t0, t0, t2);
-            tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
-            tcg_gen_shr_tl(t1, t1, cpu_tmp5);
-            tcg_gen_or_tl(t0, t0, t1);
+            tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+            if (ot == OT_WORD) {
+                /* Only needed if count > 16, for Intel behaviour.  */
+                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
+                tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4);
+                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
+            }
+
+            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+            tcg_gen_shl_tl(cpu_T[0], cpu_T[0], count);
+            tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
         }
+        tcg_gen_movi_tl(cpu_tmp4, 0);
+        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[1], count, cpu_tmp4,
+                           cpu_tmp4, cpu_T[1]);
+        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        break;
     }
-    tcg_gen_mov_tl(t1, cpu_tmp4);
 
-    gen_set_label(label1);
     /* store */
     if (op1 == OR_TMP0) {
-        gen_op_st_v(ot + s->mem_index, t0, a0);
-    } else {
-        gen_op_mov_reg_v(ot, op1, t0);
-    }
-    
-    /* Update eflags data because we cannot predict flags afterward.  */
-    gen_update_cc_op(s);
-    set_cc_op(s, CC_OP_DYNAMIC);
-
-    label2 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
-
-    tcg_gen_mov_tl(cpu_cc_src, t1);
-    tcg_gen_mov_tl(cpu_cc_dst, t0);
-    if (is_right) {
-        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+        gen_op_st_T0_A0(ot + s->mem_index);
     } else {
-        tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+        gen_op_mov_reg_T0(ot, op1);
     }
-    gen_set_label(label2);
 
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-    tcg_temp_free(a0);
+    gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right);
+    tcg_temp_free(count);
 }
 
 static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
@@ -3173,6 +3148,9 @@ struct SSEOpHelper_eppi {
 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
+#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
+        CPUID_EXT_PCLMULQDQ }
+#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
 
 static const struct SSEOpHelper_epp sse_op_table6[256] = {
     [0x00] = SSSE3_OP(pshufb),
@@ -3221,6 +3199,11 @@ static const struct SSEOpHelper_epp sse_op_table6[256] = {
     [0x3f] = SSE41_OP(pmaxud),
     [0x40] = SSE41_OP(pmulld),
     [0x41] = SSE41_OP(phminposuw),
+    [0xdb] = AESNI_OP(aesimc),
+    [0xdc] = AESNI_OP(aesenc),
+    [0xdd] = AESNI_OP(aesenclast),
+    [0xde] = AESNI_OP(aesdec),
+    [0xdf] = AESNI_OP(aesdeclast),
 };
 
 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
@@ -3242,10 +3225,12 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = {
     [0x40] = SSE41_OP(dpps),
     [0x41] = SSE41_OP(dppd),
     [0x42] = SSE41_OP(mpsadbw),
+    [0x44] = PCLMULQDQ_OP(pclmulqdq),
     [0x60] = SSE42_OP(pcmpestrm),
     [0x61] = SSE42_OP(pcmpestri),
     [0x62] = SSE42_OP(pcmpistrm),
     [0x63] = SSE42_OP(pcmpistri),
+    [0xdf] = AESNI_OP(aeskeygenassist),
 };
 
 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
@@ -4137,31 +4122,18 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 switch (ot) {
-                    TCGv_i64 t0, t1;
                 default:
-                    t0 = tcg_temp_new_i64();
-                    t1 = tcg_temp_new_i64();
-#ifdef TARGET_X86_64
-                    tcg_gen_ext32u_i64(t0, cpu_T[0]);
-                    tcg_gen_ext32u_i64(t1, cpu_regs[R_EDX]);
-#else
-                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
-                    tcg_gen_extu_i32_i64(t0, cpu_regs[R_EDX]);
-#endif
-                    tcg_gen_mul_i64(t0, t0, t1);
-                    tcg_gen_trunc_i64_tl(cpu_T[0], t0);
-                    tcg_gen_shri_i64(t0, t0, 32);
-                    tcg_gen_trunc_i64_tl(cpu_T[1], t0);
-                    tcg_temp_free_i64(t0);
-                    tcg_temp_free_i64(t1);
-                    gen_op_mov_reg_T0(OT_LONG, s->vex_v);
-                    gen_op_mov_reg_T1(OT_LONG, reg);
+                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
+                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+                                      cpu_tmp2_i32, cpu_tmp3_i32);
+                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
+                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
                     break;
 #ifdef TARGET_X86_64
                 case OT_QUAD:
-                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[R_EDX]);
-                    tcg_gen_mul_tl(cpu_regs[s->vex_v], cpu_T[0], cpu_T[1]);
-                    gen_helper_umulh(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+                    tcg_gen_mulu2_i64(cpu_regs[s->vex_v], cpu_regs[reg],
+                                      cpu_T[0], cpu_regs[R_EDX]);
                     break;
 #endif
                 }
@@ -4208,7 +4180,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
                     goto illegal_op;
                 } else {
-                    TCGv carry_in, carry_out;
+                    TCGv carry_in, carry_out, zero;
                     int end_op;
 
                     ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
@@ -4239,7 +4211,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         carry_in = carry_out;
                         break;
                     default:
-                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADCOX);
+                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
                         break;
                     }
                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
@@ -4268,18 +4240,16 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 #endif
                     default:
                         /* Otherwise compute the carry-out in two steps.  */
-                        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]);
-                        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4,
-                                           cpu_T[0], cpu_regs[reg]);
-                        tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in);
-                        tcg_gen_setcond_tl(TCG_COND_LTU, carry_out,
-                                           cpu_regs[reg], cpu_T[0]);
-                        tcg_gen_or_tl(carry_out, carry_out, cpu_tmp4);
+                        zero = tcg_const_tl(0);
+                        tcg_gen_add2_tl(cpu_T[0], carry_out,
+                                        cpu_T[0], zero,
+                                        carry_in, zero);
+                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
+                                        cpu_regs[reg], carry_out,
+                                        cpu_T[0], zero);
+                        tcg_temp_free(zero);
                         break;
                     }
-                    /* We began with all flags computed to CC_SRC, and we
-                       have now placed the carry-out in CC_DST.  All that
-                       is left is to record the CC_OP.  */
                     set_cc_op(s, end_op);
                 }
                 break;
@@ -4445,9 +4415,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     if (mod == 3)
                         gen_op_mov_TN_reg(OT_LONG, 0, rm);
                     else
-                        tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0,
+                        tcg_gen_qemu_ld8u(cpu_T[0], cpu_A0,
                                         (s->mem_index >> 2) - 1);
-                    tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State,
+                    tcg_gen_st8_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
                                             xmm_regs[reg].XMM_B(val & 15)));
                     break;
                 case 0x21: /* insertps */
@@ -4787,7 +4757,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             }
             s->pc++;
 
-            /* 4.1.1-4.1.3: No preceeding lock, 66, f2, f3, or rex prefixes. */
+            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
                             | PREFIX_LOCK | PREFIX_DATA)) {
                 goto illegal_op;
@@ -5060,39 +5030,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 break;
             default:
             case OT_LONG:
-#ifdef TARGET_X86_64
-                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
-                tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
-                tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
-                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_T0(OT_LONG, R_EAX);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
-                gen_op_mov_reg_T0(OT_LONG, R_EDX);
-                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
-#else
-                {
-                    TCGv_i64 t0, t1;
-                    t0 = tcg_temp_new_i64();
-                    t1 = tcg_temp_new_i64();
-                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
-                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
-                    tcg_gen_extu_i32_i64(t1, cpu_T[1]);
-                    tcg_gen_mul_i64(t0, t0, t1);
-                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
-                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                    tcg_gen_shri_i64(t0, t0, 32);
-                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
-                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
-                }
-#endif
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
+                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+                                  cpu_tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
-                gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]);
+                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
+                                  cpu_T[0], cpu_regs[R_EAX]);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULQ);
                 break;
 #endif
@@ -5128,41 +5081,25 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 break;
             default:
             case OT_LONG:
-#ifdef TARGET_X86_64
-                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
-                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
-                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
-                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_T0(OT_LONG, R_EAX);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
-                gen_op_mov_reg_T0(OT_LONG, R_EDX);
-#else
-                {
-                    TCGv_i64 t0, t1;
-                    t0 = tcg_temp_new_i64();
-                    t1 = tcg_temp_new_i64();
-                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
-                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
-                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
-                    tcg_gen_mul_i64(t0, t0, t1);
-                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
-                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
-                    tcg_gen_shri_i64(t0, t0, 32);
-                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
-                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
-                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-                }
-#endif
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
+                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+                                  cpu_tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
+                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
                 set_cc_op(s, CC_OP_MULL);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
-                gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]);
+                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
+                                  cpu_T[0], cpu_regs[R_EAX]);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULQ);
                 break;
 #endif
@@ -5417,37 +5354,27 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         } else {
             gen_op_mov_TN_reg(ot, 1, reg);
         }
-
-#ifdef TARGET_X86_64
-        if (ot == OT_QUAD) {
-            gen_helper_imulq_T0_T1(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
-        } else
-#endif
-        if (ot == OT_LONG) {
+        switch (ot) {
 #ifdef TARGET_X86_64
-                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
-                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
-                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-#else
-                {
-                    TCGv_i64 t0, t1;
-                    t0 = tcg_temp_new_i64();
-                    t1 = tcg_temp_new_i64();
-                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
-                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
-                    tcg_gen_mul_i64(t0, t0, t1);
-                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
-                    tcg_gen_shri_i64(t0, t0, 32);
-                    tcg_gen_trunc_i64_i32(cpu_T[1], t0);
-                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
-                }
+        case OT_QUAD:
+            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T[1], cpu_T[0], cpu_T[1]);
+            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
+            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
+            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T[1]);
+            break;
 #endif
-        } else {
+        case OT_LONG:
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+                              cpu_tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
+            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
+            break;
+        default:
             tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
             tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
             /* XXX: use 32 bit mul which could be faster */
@@ -5455,8 +5382,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
             tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
             tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+            gen_op_mov_reg_T0(ot, reg);
+            break;
         }
-        gen_op_mov_reg_T0(ot, reg);
         set_cc_op(s, CC_OP_MULB + ot);
         break;
     case 0x1c0:
@@ -8362,11 +8290,11 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     if (flags & HF_SOFTMMU_MASK) {
         dc->mem_index = (cpu_mmu_index(env) + 1) << 2;
     }
-    dc->cpuid_features = env->cpuid_features;
-    dc->cpuid_ext_features = env->cpuid_ext_features;
-    dc->cpuid_ext2_features = env->cpuid_ext2_features;
-    dc->cpuid_ext3_features = env->cpuid_ext3_features;
-    dc->cpuid_7_0_ebx_features = env->cpuid_7_0_ebx_features;
+    dc->cpuid_features = env->features[FEAT_1_EDX];
+    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
+    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
+    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
+    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
 #ifdef TARGET_X86_64
     dc->lma = (flags >> HF_LMA_SHIFT) & 1;
     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
@@ -8393,7 +8321,6 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     cpu_tmp2_i32 = tcg_temp_new_i32();
     cpu_tmp3_i32 = tcg_temp_new_i32();
     cpu_tmp4 = tcg_temp_new();
-    cpu_tmp5 = tcg_temp_new();
     cpu_ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
     cpu_cc_srcT = tcg_temp_local_new();
@@ -8408,7 +8335,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
 
-    gen_icount_start();
+    gen_tb_start();
     for(;;) {
         if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
             QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
@@ -8466,7 +8393,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
     }
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
-    gen_icount_end(tb, num_insns);
+    gen_tb_end(tb, num_insns);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     /* we don't forget to fill the last values */
     if (search_pc) {