]> git.proxmox.com Git - mirror_qemu.git/blobdiff - target-i386/translate.c
Fix division by zero handling, by Joris van Rantwijk.
[mirror_qemu.git] / target-i386 / translate.c
index 46bf933c9c645a1d16ec07b18e38546c520eea97..393db0d65e369ca049d207d938c674506fe5dc98 100644 (file)
@@ -100,6 +100,7 @@ typedef struct DisasContext {
     int popl_esp_hack; /* for correct popl with esp base handling */
     int rip_offset; /* only used in x86_64, but left for simplicity */
     int cpuid_features;
+    int cpuid_ext_features;
 } DisasContext;
 
 static void gen_eob(DisasContext *s);
@@ -600,12 +601,14 @@ static GenOpFunc *gen_op_shift_mem_T0_T1_cc[3 * 4][8] = {
     {\
         gen_op_shldw ## SUFFIX ## _T0_T1_ ## op ## _cc,\
         gen_op_shrdw ## SUFFIX ## _T0_T1_ ## op ## _cc,\
-    },\
+     },\
     {\
         gen_op_shldl ## SUFFIX ## _T0_T1_ ## op ## _cc,\
         gen_op_shrdl ## SUFFIX ## _T0_T1_ ## op ## _cc,\
     },\
     {\
+X86_64_DEF(gen_op_shldq ## SUFFIX ## _T0_T1_ ## op ## _cc,\
+           gen_op_shrdq ## SUFFIX ## _T0_T1_ ## op ## _cc,)\
     },
 
 static GenOpFunc1 *gen_op_shiftd_T0_T1_im_cc[4][2] = {
@@ -1612,6 +1615,56 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_
     *offset_ptr = disp;
 }
 
+static void gen_nop_modrm(DisasContext *s, int modrm)
+{
+    int mod, rm, base, code;
+
+    mod = (modrm >> 6) & 3;
+    if (mod == 3)
+        return;
+    rm = modrm & 7;
+
+    if (s->aflag) {
+
+        base = rm;
+        
+        if (base == 4) {
+            code = ldub_code(s->pc++);
+            base = (code & 7);
+        }
+        
+        switch (mod) {
+        case 0:
+            if (base == 5) {
+                s->pc += 4;
+            }
+            break;
+        case 1:
+            s->pc++;
+            break;
+        default:
+        case 2:
+            s->pc += 4;
+            break;
+        }
+    } else {
+        switch (mod) {
+        case 0:
+            if (rm == 6) {
+                s->pc += 2;
+            }
+            break;
+        case 1:
+            s->pc++;
+            break;
+        default:
+        case 2:
+            s->pc += 2;
+            break;
+        }
+    }
+}
+
 /* used for LEA and MOV AX, mem */
 static void gen_add_A0_ds_seg(DisasContext *s)
 {
@@ -1625,7 +1678,14 @@ static void gen_add_A0_ds_seg(DisasContext *s)
         override = R_DS;
     }
     if (must_add_seg) {
-        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base));
+#ifdef TARGET_X86_64
+        if (CODE64(s)) {
+            gen_op_addq_A0_seg(offsetof(CPUX86State,segs[override].base));
+        } else 
+#endif
+        {
+            gen_op_addl_A0_seg(offsetof(CPUX86State,segs[override].base));
+        }
     }
 }
 
@@ -1691,6 +1751,31 @@ static inline int insn_const_size(unsigned int ot)
         return 4;
 }
 
+static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
+{
+    TranslationBlock *tb;
+    target_ulong pc;
+
+    pc = s->cs_base + eip;
+    tb = s->tb;
+    /* NOTE: we handle the case where the TB spans two pages here */
+    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
+        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
+        /* jump to same page: we can use a direct jump */
+        if (tb_num == 0)
+            gen_op_goto_tb0(TBPARAM(tb));
+        else
+            gen_op_goto_tb1(TBPARAM(tb));
+        gen_jmp_im(eip);
+        gen_op_movl_T0_im((long)tb + tb_num);
+        gen_op_exit_tb();
+    } else {
+        /* jump to another page: currently not optimized */
+        gen_jmp_im(eip);
+        gen_eob(s);
+    }
+}
+
 static inline void gen_jcc(DisasContext *s, int b, 
                            target_ulong val, target_ulong next_eip)
 {
@@ -1770,8 +1855,10 @@ static inline void gen_jcc(DisasContext *s, int b,
             break;
         }
 
-        if (s->cc_op != CC_OP_DYNAMIC)
+        if (s->cc_op != CC_OP_DYNAMIC) {
             gen_op_set_cc_op(s->cc_op);
+            s->cc_op = CC_OP_DYNAMIC;
+        }
 
         if (!func) {
             gen_setcc_slow[jcc_op]();
@@ -1788,16 +1875,10 @@ static inline void gen_jcc(DisasContext *s, int b,
         l1 = gen_new_label();
         func(l1);
 
-        gen_op_goto_tb0(TBPARAM(tb));
-        gen_jmp_im(next_eip);
-        gen_op_movl_T0_im((long)tb + 0);
-        gen_op_exit_tb();
+        gen_goto_tb(s, 0, next_eip);
 
         gen_set_label(l1);
-        gen_op_goto_tb1(TBPARAM(tb));
-        gen_jmp_im(val);
-        gen_op_movl_T0_im((long)tb + 1);
-        gen_op_exit_tb();
+        gen_goto_tb(s, 1, val);
 
         s->is_jmp = 3;
     } else {
@@ -1946,10 +2027,14 @@ static void gen_push_T0(DisasContext *s)
 {
 #ifdef TARGET_X86_64
     if (CODE64(s)) {
-        /* XXX: check 16 bit behaviour */
         gen_op_movq_A0_reg[R_ESP]();
-        gen_op_subq_A0_8();
-        gen_op_st_T0_A0[OT_QUAD + s->mem_index]();
+        if (s->dflag) {
+            gen_op_subq_A0_8();
+            gen_op_st_T0_A0[OT_QUAD + s->mem_index]();
+        } else {
+            gen_op_subq_A0_2();
+            gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+        }
         gen_op_movq_ESP_A0();
     } else 
 #endif
@@ -1983,10 +2068,14 @@ static void gen_push_T1(DisasContext *s)
 {
 #ifdef TARGET_X86_64
     if (CODE64(s)) {
-        /* XXX: check 16 bit behaviour */
         gen_op_movq_A0_reg[R_ESP]();
-        gen_op_subq_A0_8();
-        gen_op_st_T1_A0[OT_QUAD + s->mem_index]();
+        if (s->dflag) {
+            gen_op_subq_A0_8();
+            gen_op_st_T1_A0[OT_QUAD + s->mem_index]();
+        } else {
+            gen_op_subq_A0_2();
+            gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+        }
         gen_op_movq_ESP_A0();
     } else 
 #endif
@@ -2018,9 +2107,8 @@ static void gen_pop_T0(DisasContext *s)
 {
 #ifdef TARGET_X86_64
     if (CODE64(s)) {
-        /* XXX: check 16 bit behaviour */
         gen_op_movq_A0_reg[R_ESP]();
-        gen_op_ld_T0_A0[OT_QUAD + s->mem_index]();
+        gen_op_ld_T0_A0[(s->dflag ? OT_QUAD : OT_WORD) + s->mem_index]();
     } else 
 #endif
     {
@@ -2039,7 +2127,7 @@ static void gen_pop_T0(DisasContext *s)
 static void gen_pop_update(DisasContext *s)
 {
 #ifdef TARGET_X86_64
-    if (CODE64(s)) {
+    if (CODE64(s) && s->dflag) {
         gen_stack_update(s, 8);
     } else
 #endif
@@ -2074,7 +2162,7 @@ static void gen_pusha(DisasContext *s)
         gen_op_st_T0_A0[OT_WORD + s->dflag + s->mem_index]();
         gen_op_addl_A0_im(2 <<  s->dflag);
     }
-    gen_op_mov_reg_T1[OT_WORD + s->dflag][R_ESP]();
+    gen_op_mov_reg_T1[OT_WORD + s->ss32][R_ESP]();
 }
 
 /* NOTE: wrap around in 16 bit not fully handled */
@@ -2096,33 +2184,55 @@ static void gen_popa(DisasContext *s)
         }
         gen_op_addl_A0_im(2 <<  s->dflag);
     }
-    gen_op_mov_reg_T1[OT_WORD + s->dflag][R_ESP]();
+    gen_op_mov_reg_T1[OT_WORD + s->ss32][R_ESP]();
 }
 
 static void gen_enter(DisasContext *s, int esp_addend, int level)
 {
     int ot, opsize;
 
-    ot = s->dflag + OT_WORD;
     level &= 0x1f;
-    opsize = 2 << s->dflag;
-
-    gen_op_movl_A0_ESP();
-    gen_op_addl_A0_im(-opsize);
-    if (!s->ss32)
-        gen_op_andl_A0_ffff();
-    gen_op_movl_T1_A0();
-    if (s->addseg)
-        gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_SS].base));
-    /* push bp */
-    gen_op_mov_TN_reg[OT_LONG][0][R_EBP]();
-    gen_op_st_T0_A0[ot + s->mem_index]();
-    if (level) {
-        gen_op_enter_level(level, s->dflag);
+#ifdef TARGET_X86_64
+    if (CODE64(s)) {
+        ot = s->dflag ? OT_QUAD : OT_WORD;
+        opsize = 1 << ot;
+        
+        gen_op_movl_A0_ESP();
+        gen_op_addq_A0_im(-opsize);
+        gen_op_movl_T1_A0();
+
+        /* push bp */
+        gen_op_mov_TN_reg[OT_LONG][0][R_EBP]();
+        gen_op_st_T0_A0[ot + s->mem_index]();
+        if (level) {
+            gen_op_enter64_level(level, (ot == OT_QUAD));
+        }
+        gen_op_mov_reg_T1[ot][R_EBP]();
+        gen_op_addl_T1_im( -esp_addend + (-opsize * level) );
+        gen_op_mov_reg_T1[OT_QUAD][R_ESP]();
+    } else 
+#endif
+    {
+        ot = s->dflag + OT_WORD;
+        opsize = 2 << s->dflag;
+        
+        gen_op_movl_A0_ESP();
+        gen_op_addl_A0_im(-opsize);
+        if (!s->ss32)
+            gen_op_andl_A0_ffff();
+        gen_op_movl_T1_A0();
+        if (s->addseg)
+            gen_op_addl_A0_seg(offsetof(CPUX86State,segs[R_SS].base));
+        /* push bp */
+        gen_op_mov_TN_reg[OT_LONG][0][R_EBP]();
+        gen_op_st_T0_A0[ot + s->mem_index]();
+        if (level) {
+            gen_op_enter_level(level, s->dflag);
+        }
+        gen_op_mov_reg_T1[ot][R_EBP]();
+        gen_op_addl_T1_im( -esp_addend + (-opsize * level) );
+        gen_op_mov_reg_T1[OT_WORD + s->ss32][R_ESP]();
     }
-    gen_op_mov_reg_T1[ot][R_EBP]();
-    gen_op_addl_T1_im( -esp_addend + (-opsize * level) );
-    gen_op_mov_reg_T1[ot][R_ESP]();
 }
 
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
@@ -2135,7 +2245,7 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
 }
 
 /* an interrupt is different from an exception because of the
-   priviledge checks */
+   privilege checks */
 static void gen_interrupt(DisasContext *s, int intno, 
                           target_ulong cur_eip, target_ulong next_eip)
 {
@@ -2179,18 +2289,12 @@ static void gen_eob(DisasContext *s)
    direct call to the next block may occur */
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
 {
-    TranslationBlock *tb = s->tb;
-
     if (s->jmp_opt) {
-        if (s->cc_op != CC_OP_DYNAMIC)
+        if (s->cc_op != CC_OP_DYNAMIC) {
             gen_op_set_cc_op(s->cc_op);
-        if (tb_num)
-            gen_op_goto_tb1(TBPARAM(tb));
-        else
-            gen_op_goto_tb0(TBPARAM(tb));
-        gen_jmp_im(eip);
-        gen_op_movl_T0_im((long)tb + tb_num);
-        gen_op_exit_tb();
+            s->cc_op = CC_OP_DYNAMIC;
+        }
+        gen_goto_tb(s, tb_num, eip);
         s->is_jmp = 3;
     } else {
         gen_jmp_im(eip);
@@ -2216,6 +2320,29 @@ static void gen_movtl_T0_im(target_ulong val)
 #endif
 }
 
+static void gen_movtl_T1_im(target_ulong val)
+{
+#ifdef TARGET_X86_64    
+    if ((int32_t)val == val) {
+        gen_op_movl_T1_im(val);
+    } else {
+        gen_op_movq_T1_im64(val >> 32, val);
+    }
+#else
+    gen_op_movl_T1_im(val);
+#endif
+}
+
+static void gen_add_A0_im(DisasContext *s, int val)
+{
+#ifdef TARGET_X86_64
+    if (CODE64(s))
+        gen_op_addq_A0_im(val);
+    else
+#endif
+        gen_op_addl_A0_im(val);
+}
+
 static GenOpFunc1 *gen_ldq_env_A0[3] = {
     gen_op_ldq_raw_env_A0,
 #ifndef CONFIG_USER_ONLY
@@ -2258,7 +2385,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
     /* pure SSE operations */
     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
-    [0x12] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
+    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
     [0x14] = { gen_op_punpckldq_xmm, gen_op_punpcklqdq_xmm },
     [0x15] = { gen_op_punpckhdq_xmm, gen_op_punpckhqdq_xmm },
@@ -2360,7 +2487,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
     [0xed] = MMX_OP2(paddsw),
     [0xee] = MMX_OP2(pmaxsw),
     [0xef] = MMX_OP2(pxor),
-    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu (PNI) */
+    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
     [0xf1] = MMX_OP2(psllw),
     [0xf2] = MMX_OP2(pslld),
     [0xf3] = MMX_OP2(psllq),
@@ -2487,19 +2614,35 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x1e7: /* movntdq */
         case 0x02b: /* movntps */
         case 0x12b: /* movntps */
-        case 0x2f0: /* lddqu */
-            if (mod == 3) 
+        case 0x3f0: /* lddqu */
+            if (mod == 3)
                 goto illegal_op;
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
             break;
         case 0x6e: /* movd mm, ea */
-            gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
-            gen_op_movl_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+                gen_op_movq_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
+            } else 
+#endif
+            {
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+                gen_op_movl_mm_T0_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
+            }
             break;
         case 0x16e: /* movd xmm, ea */
-            gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
-            gen_op_movl_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg]));
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+                gen_op_movq_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg]));
+            } else 
+#endif
+            {
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+                gen_op_movl_mm_T0_xmm(offsetof(CPUX86State,xmm_regs[reg]));
+            }
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
@@ -2566,6 +2709,34 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                             offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
             }
             break;
+        case 0x212: /* movsldup */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
+            }
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+            break;
+        case 0x312: /* movddup */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            }
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            break;
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
@@ -2595,12 +2766,28 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                         offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
             break;
         case 0x7e: /* movd ea, mm */
-            gen_op_movl_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
-            gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                gen_op_movq_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+            } else 
+#endif
+            {
+                gen_op_movl_T0_mm_mmx(offsetof(CPUX86State,fpregs[reg].mmx));
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+            }
             break;
         case 0x17e: /* movd ea, xmm */
-            gen_op_movl_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg]));
-            gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+#ifdef TARGET_X86_64
+            if (s->dflag == 2) {
+                gen_op_movq_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg]));
+                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+            } else 
+#endif
+            {
+                gen_op_movl_T0_mm_xmm(offsetof(CPUX86State,xmm_regs[reg]));
+                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+            }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
@@ -2710,14 +2897,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             sse_op2(op2_offset, op1_offset);
             break;
         case 0x050: /* movmskps */
-            gen_op_movmskps(offsetof(CPUX86State,xmm_regs[reg]));
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_T0[OT_LONG][rm]();
+            gen_op_movmskps(offsetof(CPUX86State,xmm_regs[rm]));
+            gen_op_mov_reg_T0[OT_LONG][reg]();
             break;
         case 0x150: /* movmskpd */
-            gen_op_movmskpd(offsetof(CPUX86State,xmm_regs[reg]));
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_T0[OT_LONG][rm]();
+            gen_op_movmskpd(offsetof(CPUX86State,xmm_regs[rm]));
+            gen_op_mov_reg_T0[OT_LONG][reg]();
             break;
         case 0x02a: /* cvtpi2ps */
         case 0x12a: /* cvtpi2pd */
@@ -2782,13 +2969,26 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x22d: /* cvtss2si */
         case 0x32d: /* cvtsd2si */
             ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
-            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                if ((b >> 8) & 1) {
+                    gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
+                } else {
+                    gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
+                    gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+                }
+                op2_offset = offsetof(CPUX86State,xmm_t0);
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
+            }
             sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 + 
-                          (b & 1) * 4](op1_offset);
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+                          (b & 1) * 4](op2_offset);
+            gen_op_mov_reg_T0[ot][reg]();
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4: 
+            s->rip_offset = 1;
             gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
             val = ldub_code(s->pc++);
             if (b1) {
@@ -2829,16 +3029,16 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x2d6: /* movq2dq */
             gen_op_enter_mmx();
-            rm = (modrm & 7) | REX_B(s);
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
-                        offsetof(CPUX86State,fpregs[reg & 7].mmx));
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+            rm = (modrm & 7);
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                        offsetof(CPUX86State,fpregs[rm].mmx));
+            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
             break;
         case 0x3d6: /* movdq2q */
             gen_op_enter_mmx();
-            rm = (modrm & 7);
-            gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
-                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            rm = (modrm & 7) | REX_B(s);
+            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
+                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
             break;
         case 0xd7: /* pmovmskb */
         case 0x1d7:
@@ -2859,12 +3059,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         }
     } else {
         /* generic MMX or SSE operation */
-        if (b == 0xf7) {
+        switch(b) {
+        case 0xf7:
             /* maskmov : we must prepare A0 */
             if (mod != 3) 
                 goto illegal_op;
 #ifdef TARGET_X86_64
-            if (CODE64(s)) {
+            if (s->aflag == 2) {
                 gen_op_movq_A0_reg[R_EDI]();
             } else 
 #endif
@@ -2874,13 +3075,21 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                     gen_op_andl_A0_ffff();
             }
             gen_add_A0_ds_seg(s);
+            break;
+        case 0x70: /* pshufx insn */
+        case 0xc6: /* pshufx insn */
+        case 0xc2: /* compare insns */
+            s->rip_offset = 1;
+            break;
+        default:
+            break;
         }
         if (is_xmm) {
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             if (mod != 3) {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
-                if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
+                if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
                                 b == 0xc2)) {
                     /* specific case for SSE single instructions */
                     if (b1 == 2) {
@@ -3355,9 +3564,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             goto illegal_op;
         }
         if (CODE64(s)) {
-            if (op >= 2 && op <= 5) {
+            if (op == 2 || op == 4) {
                 /* operand size for jumps is 64 bit */
                 ot = OT_QUAD;
+            } else if (op == 3 || op == 5) {
+                /* for call calls, the operand is 16 or 32 bit, even
+                   in long mode */
+                ot = dflag ? OT_LONG : OT_WORD;
             } else if (op == 6) {
                 /* default push size is 64 bit */
                 ot = dflag ? OT_QUAD : OT_WORD;
@@ -3391,21 +3604,21 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (s->dflag == 0)
                 gen_op_andl_T0_ffff();
             next_eip = s->pc - s->cs_base;
-            gen_op_movl_T1_im(next_eip);
+            gen_movtl_T1_im(next_eip);
             gen_push_T1(s);
             gen_op_jmp_T0();
             gen_eob(s);
             break;
         case 3: /* lcall Ev */
             gen_op_ld_T1_A0[ot + s->mem_index]();
-            gen_op_addl_A0_im(1 << (ot - OT_WORD + 1));
+            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
             gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
         do_lcall:
             if (s->pe && !s->vm86) {
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_op_lcall_protected_T0_T1(dflag, s->pc - s->cs_base);
+                gen_op_lcall_protected_T0_T1(dflag, s->pc - pc_start);
             } else {
                 gen_op_lcall_real_T0_T1(dflag, s->pc - s->cs_base);
             }
@@ -3419,14 +3632,14 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             break;
         case 5: /* ljmp Ev */
             gen_op_ld_T1_A0[ot + s->mem_index]();
-            gen_op_addl_A0_im(1 << (ot - OT_WORD + 1));
+            gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
             gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
         do_ljmp:
             if (s->pe && !s->vm86) {
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_op_ljmp_protected_T0_T1(s->pc - s->cs_base);
+                gen_op_ljmp_protected_T0_T1(s->pc - pc_start);
             } else {
                 gen_op_movl_seg_T0_vm(offsetof(CPUX86State,segs[R_CS]));
                 gen_op_movl_T0_T1();
@@ -3584,6 +3797,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
+        gen_jmp_im(pc_start - s->cs_base);
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
@@ -3656,7 +3870,6 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         break;
     case 0xc8: /* enter */
         {
-            /* XXX: long mode support */
             int level;
             val = lduw_code(s->pc);
             s->pc += 2;
@@ -3666,7 +3879,6 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         break;
     case 0xc9: /* leave */
         /* XXX: exception not precise (ESP is updated before potential exception) */
-        /* XXX: may be invalid for 16 bit in long mode */
         if (CODE64(s)) {
             gen_op_mov_TN_reg[OT_QUAD][0][R_EBP]();
             gen_op_mov_reg_T0[OT_QUAD][R_ESP]();
@@ -3885,7 +4097,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             else
                 ot = dflag + OT_WORD;
 #ifdef TARGET_X86_64
-            if (CODE64(s)) {
+            if (s->aflag == 2) {
                 offset_addr = ldq_code(s->pc);
                 s->pc += 8;
                 if (offset_addr == (int32_t)offset_addr)
@@ -3914,7 +4126,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         break;
     case 0xd7: /* xlat */
 #ifdef TARGET_X86_64
-        if (CODE64(s)) {
+        if (s->aflag == 2) {
             gen_op_movq_A0_reg[R_EBX]();
             gen_op_addq_A0_AL();
         } else 
@@ -4016,7 +4228,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             goto illegal_op;
         gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
         gen_op_ld_T1_A0[ot + s->mem_index]();
-        gen_op_addl_A0_im(1 << (ot - OT_WORD + 1));
+        gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
         /* load the segment first to handle exceptions properly */
         gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
         gen_movl_seg_T0(s, op, pc_start - s->cs_base);
@@ -4188,16 +4400,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             case 0x08: /* flds */
             case 0x0a: /* fsts */
             case 0x0b: /* fstps */
-            case 0x18: /* fildl */
-            case 0x1a: /* fistl */
-            case 0x1b: /* fistpl */
-            case 0x28: /* fldl */
-            case 0x2a: /* fstl */
-            case 0x2b: /* fstpl */
-            case 0x38: /* filds */
-            case 0x3a: /* fists */
-            case 0x3b: /* fistps */
-                
+            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
+            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
+            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
                 switch(op & 7) {
                 case 0:
                     switch(op >> 4) {
@@ -4216,6 +4421,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         break;
                     }
                     break;
+                case 1:
+                    switch(op >> 4) {
+                    case 1:
+                        gen_op_fisttl_ST0_A0();
+                        break;
+                    case 2:
+                        gen_op_fisttll_ST0_A0();
+                        break;
+                    case 3:
+                    default:
+                        gen_op_fistt_ST0_A0();
+                    }
+                    gen_op_fpop();
+                    break;
                 default:
                     switch(op >> 4) {
                     case 0:
@@ -4738,6 +4957,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         val = ldsw_code(s->pc);
         s->pc += 2;
         gen_pop_T0(s);
+        if (CODE64(s) && s->dflag)
+            s->dflag = 2;
         gen_stack_update(s, val + (2 << s->dflag));
         if (s->dflag == 0)
             gen_op_andl_T0_ffff();
@@ -4832,7 +5053,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_op_movl_T1_imu(offset);
         }
         goto do_lcall;
-    case 0xe9: /* jmp */
+    case 0xe9: /* jmp im */
         if (dflag)
             tval = (int32_t)insn_get(s, OT_LONG);
         else
@@ -4993,7 +5214,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0x1ba: /* bt/bts/btr/btc Gv, im */
         ot = dflag + OT_WORD;
         modrm = ldub_code(s->pc++);
-        op = ((modrm >> 3) & 7) | rex_r;
+        op = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
@@ -5106,8 +5327,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (CODE64(s))
             goto illegal_op;
         val = ldub_code(s->pc++);
-        gen_op_aam(val);
-        s->cc_op = CC_OP_LOGICB;
+        if (val == 0) {
+            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
+        } else {
+            gen_op_aam(val);
+            s->cc_op = CC_OP_LOGICB;
+        }
         break;
     case 0xd5: /* aad */
         if (CODE64(s))
@@ -5155,7 +5380,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         gen_op_into(s->pc - pc_start);
         break;
     case 0xf1: /* icebp (undocumented, exits to external debugger) */
+#if 1
         gen_debug(s, pc_start - s->cs_base);
+#else
+        /* start debug */
+        tb_flush(cpu_single_env);
+        cpu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
+#endif
         break;
     case 0xfa: /* cli */
         if (!s->vm86) {
@@ -5258,6 +5489,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_op_jz_ecx[s->aflag](l1);
             } else {
                 gen_op_dec_ECX[s->aflag]();
+                if (b <= 1)
+                    gen_op_mov_T0_cc();
                 gen_op_loop[s->aflag][b](l1);
             }
 
@@ -5281,6 +5514,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         }
         break;
     case 0x131: /* rdtsc */
+        gen_jmp_im(pc_start - s->cs_base);
         gen_op_rdtsc();
         break;
     case 0x134: /* sysenter */
@@ -5334,6 +5568,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             }
             gen_jmp_im(pc_start - s->cs_base);
             gen_op_sysret(s->dflag);
+            /* condition codes are modified only in long mode */
+            if (s->lma)
+                s->cc_op = CC_OP_EFLAGS;
             gen_eob(s);
         }
         break;
@@ -5418,31 +5655,69 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
+        rm = modrm & 7;
         switch(op) {
         case 0: /* sgdt */
-        case 1: /* sidt */
             if (mod == 3)
                 goto illegal_op;
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            if (op == 0)
-                gen_op_movl_T0_env(offsetof(CPUX86State,gdt.limit));
-            else
-                gen_op_movl_T0_env(offsetof(CPUX86State,idt.limit));
+            gen_op_movl_T0_env(offsetof(CPUX86State, gdt.limit));
             gen_op_st_T0_A0[OT_WORD + s->mem_index]();
-#ifdef TARGET_X86_64
-            if (CODE64(s)) 
-                gen_op_addq_A0_im(2);
-            else
-#endif
-                gen_op_addl_A0_im(2);
-            if (op == 0)
-                gen_op_movtl_T0_env(offsetof(CPUX86State,gdt.base));
-            else
-                gen_op_movtl_T0_env(offsetof(CPUX86State,idt.base));
+            gen_add_A0_im(s, 2);
+            gen_op_movtl_T0_env(offsetof(CPUX86State, gdt.base));
             if (!s->dflag)
                 gen_op_andl_T0_im(0xffffff);
             gen_op_st_T0_A0[CODE64(s) + OT_LONG + s->mem_index]();
             break;
+        case 1:
+            if (mod == 3) {
+                switch (rm) {
+                case 0: /* monitor */
+                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
+                        s->cpl != 0)
+                        goto illegal_op;
+                    gen_jmp_im(pc_start - s->cs_base);
+#ifdef TARGET_X86_64
+                    if (s->aflag == 2) {
+                        gen_op_movq_A0_reg[R_EBX]();
+                        gen_op_addq_A0_AL();
+                    } else 
+#endif
+                    {
+                        gen_op_movl_A0_reg[R_EBX]();
+                        gen_op_addl_A0_AL();
+                        if (s->aflag == 0)
+                            gen_op_andl_A0_ffff();
+                    }
+                    gen_add_A0_ds_seg(s);
+                    gen_op_monitor();
+                    break;
+                case 1: /* mwait */
+                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
+                        s->cpl != 0)
+                        goto illegal_op;
+                    if (s->cc_op != CC_OP_DYNAMIC) {
+                        gen_op_set_cc_op(s->cc_op);
+                        s->cc_op = CC_OP_DYNAMIC;
+                    }
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_op_mwait();
+                    gen_eob(s);
+                    break;
+                default:
+                    goto illegal_op;
+                }
+            } else { /* sidt */
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_movl_T0_env(offsetof(CPUX86State, idt.limit));
+                gen_op_st_T0_A0[OT_WORD + s->mem_index]();
+                gen_add_A0_im(s, 2);
+                gen_op_movtl_T0_env(offsetof(CPUX86State, idt.base));
+                if (!s->dflag)
+                    gen_op_andl_T0_im(0xffffff);
+                gen_op_st_T0_A0[CODE64(s) + OT_LONG + s->mem_index]();
+            }
+            break;
         case 2: /* lgdt */
         case 3: /* lidt */
             if (mod == 3)
@@ -5452,12 +5727,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             } else {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_T1_A0[OT_WORD + s->mem_index]();
-#ifdef TARGET_X86_64
-                if (CODE64(s))
-                    gen_op_addq_A0_im(2);
-                else
-#endif
-                    gen_op_addl_A0_im(2);
+                gen_add_A0_im(s, 2);
                 gen_op_ld_T0_A0[CODE64(s) + OT_LONG + s->mem_index]();
                 if (!s->dflag)
                     gen_op_andl_T0_im(0xffffff);
@@ -5490,7 +5760,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             } else {
                 if (mod == 3) {
 #ifdef TARGET_X86_64
-                    if (CODE64(s) && (modrm & 7) == 0) {
+                    if (CODE64(s) && rm == 0) {
                         /* swapgs */
                         gen_op_movtl_T0_env(offsetof(CPUX86State,segs[R_GS].base));
                         gen_op_movtl_T1_env(offsetof(CPUX86State,kernelgsbase));
@@ -5608,10 +5878,15 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             /* nothing more to do */
             break;
-        default:
-            goto illegal_op;
+        default: /* nop (multi byte) */
+            gen_nop_modrm(s, modrm);
+            break;
         }
         break;
+    case 0x119 ... 0x11f: /* nop (multi byte) */
+        modrm = ldub_code(s->pc++);
+        gen_nop_modrm(s, modrm);
+        break;
     case 0x120: /* mov reg, crN */
     case 0x122: /* mov crN, reg */
         if (s->cpl != 0) {
@@ -5709,14 +5984,24 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         op = (modrm >> 3) & 7;
         switch(op) {
         case 0: /* fxsave */
-            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR))
+            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) || 
+                (s->flags & HF_EM_MASK))
                 goto illegal_op;
+            if (s->flags & HF_TS_MASK) {
+                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+                break;
+            }
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             gen_op_fxsave_A0((s->dflag == 2));
             break;
         case 1: /* fxrstor */
-            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR))
+            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) || 
+                (s->flags & HF_EM_MASK))
                 goto illegal_op;
+            if (s->flags & HF_TS_MASK) {
+                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+                break;
+            }
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             gen_op_fxrstor_A0((s->dflag == 2));
             break;
@@ -5740,14 +6025,41 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             break;
         case 5: /* lfence */
         case 6: /* mfence */
-        case 7: /* sfence */
             if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE))
                 goto illegal_op;
             break;
+        case 7: /* sfence / clflush */
+            if ((modrm & 0xc7) == 0xc0) {
+                /* sfence */
+                if (!(s->cpuid_features & CPUID_SSE))
+                    goto illegal_op;
+            } else {
+                /* clflush */
+                if (!(s->cpuid_features & CPUID_CLFLUSH))
+                    goto illegal_op;
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            }
+            break;
         default:
             goto illegal_op;
         }
         break;
+    case 0x10d: /* prefetch */
+        modrm = ldub_code(s->pc++);
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        /* ignore for now */
+        break;
+    case 0x1aa: /* rsm */
+        if (!(s->flags & HF_SMM_MASK))
+            goto illegal_op;
+        if (s->cc_op != CC_OP_DYNAMIC) {
+            gen_op_set_cc_op(s->cc_op);
+            s->cc_op = CC_OP_DYNAMIC;
+        }
+        gen_jmp_im(s->pc - s->cs_base);
+        gen_op_rsm();
+        gen_eob(s);
+        break;
     case 0x110 ... 0x117:
     case 0x128 ... 0x12f:
     case 0x150 ... 0x177:
@@ -5983,6 +6295,8 @@ static uint16_t opc_write_flags[NB_OPS] = {
     [INDEX_op_cmpxchg8b] = CC_Z,
     [INDEX_op_lar] = CC_Z,
     [INDEX_op_lsl] = CC_Z,
+    [INDEX_op_verr] = CC_Z,
+    [INDEX_op_verw] = CC_Z,
     [INDEX_op_fcomi_ST0_FT0] = CC_Z | CC_P | CC_C,
     [INDEX_op_fucomi_ST0_FT0] = CC_Z | CC_P | CC_C,
 
@@ -6121,7 +6435,7 @@ static void optimize_flags(uint16_t *opc_buf, int opc_buf_len)
 
     opc_ptr = opc_buf + opc_buf_len;
     /* live_flags contains the flags needed by the next instructions
-       in the code. At the end of the bloc, we consider that all the
+       in the code. At the end of the block, we consider that all the
        flags are live. */
     live_flags = CC_OSZAPC;
     while (opc_ptr > opc_buf) {
@@ -6181,6 +6495,7 @@ static inline int gen_intermediate_code_internal(CPUState *env,
             dc->mem_index = 1 * 4;
     }
     dc->cpuid_features = env->cpuid_features;
+    dc->cpuid_ext_features = env->cpuid_ext_features;
 #ifdef TARGET_X86_64
     dc->lma = (flags >> HF_LMA_SHIFT) & 1;
     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;