enum {
CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
CC_OP_EFLAGS, /* all cc are explicitely computed, CC_SRC = flags */
- CC_OP_MUL, /* modify all flags, C, O = (CC_SRC != 0) */
+
+ CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */
+ CC_OP_MULW,
+ CC_OP_MULL,
CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
CC_OP_ADDW,
}
/* multiply/divide */
+
+/* XXX: add eflags optimizations */
+/* XXX: add non P4 style flags */
+
void OPPROTO op_mulb_AL_T0(void)
{
unsigned int res;
res = (uint8_t)EAX * (uint8_t)T0;
EAX = (EAX & 0xffff0000) | res;
+ CC_DST = res;
CC_SRC = (res & 0xff00);
}
int res;
res = (int8_t)EAX * (int8_t)T0;
EAX = (EAX & 0xffff0000) | (res & 0xffff);
+ CC_DST = res;
CC_SRC = (res != (int8_t)res);
}
res = (uint16_t)EAX * (uint16_t)T0;
EAX = (EAX & 0xffff0000) | (res & 0xffff);
EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff);
+ CC_DST = res;
CC_SRC = res >> 16;
}
res = (int16_t)EAX * (int16_t)T0;
EAX = (EAX & 0xffff0000) | (res & 0xffff);
EDX = (EDX & 0xffff0000) | ((res >> 16) & 0xffff);
+ CC_DST = res;
CC_SRC = (res != (int16_t)res);
}
res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0);
EAX = res;
EDX = res >> 32;
+ CC_DST = res;
CC_SRC = res >> 32;
}
res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0);
EAX = res;
EDX = res >> 32;
+ CC_DST = res;
CC_SRC = (res != (int32_t)res);
}
int res;
res = (int16_t)T0 * (int16_t)T1;
T0 = res;
+ CC_DST = res;
CC_SRC = (res != (int16_t)res);
}
int64_t res;
res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1);
T0 = res;
+ CC_DST = res;
CC_SRC = (res != (int32_t)res);
}
return CC_SRC & CC_C;
}
-static int compute_c_mul(void)
-{
- int cf;
- cf = (CC_SRC != 0);
- return cf;
-}
-
-static int compute_all_mul(void)
-{
- int cf, pf, af, zf, sf, of;
- cf = (CC_SRC != 0);
- pf = 0; /* undefined */
- af = 0; /* undefined */
- zf = 0; /* undefined */
- sf = 0; /* undefined */
- of = cf << 11;
- return cf | pf | af | zf | sf | of;
-}
-
CCTable cc_table[CC_OP_NB] = {
[CC_OP_DYNAMIC] = { /* should never happen */ },
[CC_OP_EFLAGS] = { compute_all_eflags, compute_c_eflags },
- [CC_OP_MUL] = { compute_all_mul, compute_c_mul },
+ [CC_OP_MULB] = { compute_all_mulb, compute_c_mull },
+ [CC_OP_MULW] = { compute_all_mulw, compute_c_mull },
+ [CC_OP_MULL] = { compute_all_mull, compute_c_mull },
[CC_OP_ADDB] = { compute_all_addb, compute_c_addb },
[CC_OP_ADDW] = { compute_all_addw, compute_c_addw },
return cf | pf | af | zf | sf | of;
}
+#if DATA_BITS == 32
+static int glue(compute_c_mul, SUFFIX)(void)
+{
+ int cf;
+ cf = (CC_SRC != 0);
+ return cf;
+}
+#endif
+
+/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
+ CF are modified and it is slower to do that. */
+static int glue(compute_all_mul, SUFFIX)(void)
+{
+ int cf, pf, af, zf, sf, of;
+ cf = (CC_SRC != 0);
+ pf = parity_table[(uint8_t)CC_DST];
+ af = 0; /* undefined */
+ zf = ((DATA_TYPE)CC_DST == 0) << 6;
+ sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ of = cf << 11;
+ return cf | pf | af | zf | sf | of;
+}
+
/* various optimized jumps cases */
void OPPROTO glue(op_jb_sub, SUFFIX)(void)