target-alpha: Fix load-locked/store-conditional.

[qemu.git] / target-alpha / op_helper.c
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c

index cdfcd95c21c249fe96fb4cbcbd8aaa9160355901..bfc095cc8a4004b1539241236881dbe28230aa89 100644 (file)
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -14,8 +14,7 @@
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
   */
  
  #include "exec.h"
@@ -23,117 +22,29 @@
  #include "softfloat.h"
  #include "helper.h"
  
-void helper_tb_flush (void)
-{
-    tlb_flush(env, 1);
-}
-
  /*****************************************************************************/
  /* Exceptions processing helpers */
-void helper_excp (int excp, int error)
+void QEMU_NORETURN helper_excp (int excp, int error)
  {
      env->exception_index = excp;
      env->error_code = error;
      cpu_loop_exit();
  }
  
-uint64_t helper_amask (uint64_t arg)
-{
-    switch (env->implver) {
-    case IMPLVER_2106x:
-        /* EV4, EV45, LCA, LCA45 & EV5 */
-        break;
-    case IMPLVER_21164:
-    case IMPLVER_21264:
-    case IMPLVER_21364:
-        arg &= ~env->amask;
-        break;
-    }
-    return arg;
-}
-
  uint64_t helper_load_pcc (void)
  {
      /* XXX: TODO */
      return 0;
  }
  
-uint64_t helper_load_implver (void)
-{
-    return env->implver;
-}
-
  uint64_t helper_load_fpcr (void)
  {
-    uint64_t ret = 0;
-#ifdef CONFIG_SOFTFLOAT
-    ret |= env->fp_status.float_exception_flags << 52;
-    if (env->fp_status.float_exception_flags)
-        ret |= 1ULL << 63;
-    env->ipr[IPR_EXC_SUM] &= ~0x3E:
-    env->ipr[IPR_EXC_SUM] |= env->fp_status.float_exception_flags << 1;
-#endif
-    switch (env->fp_status.float_rounding_mode) {
-    case float_round_nearest_even:
-        ret |= 2ULL << 58;
-        break;
-    case float_round_down:
-        ret |= 1ULL << 58;
-        break;
-    case float_round_up:
-        ret |= 3ULL << 58;
-        break;
-    case float_round_to_zero:
-        break;
-    }
-    return ret;
+    return cpu_alpha_load_fpcr (env);
  }
  
  void helper_store_fpcr (uint64_t val)
  {
-#ifdef CONFIG_SOFTFLOAT
-    set_float_exception_flags((val >> 52) & 0x3F, &FP_STATUS);
-#endif
-    switch ((val >> 58) & 3) {
-    case 0:
-        set_float_rounding_mode(float_round_to_zero, &FP_STATUS);
-        break;
-    case 1:
-        set_float_rounding_mode(float_round_down, &FP_STATUS);
-        break;
-    case 2:
-        set_float_rounding_mode(float_round_nearest_even, &FP_STATUS);
-        break;
-    case 3:
-        set_float_rounding_mode(float_round_up, &FP_STATUS);
-        break;
-    }
-}
-
-spinlock_t intr_cpu_lock = SPIN_LOCK_UNLOCKED;
-
-uint64_t helper_rs(void)
-{
-    uint64_t tmp;
-
-    spin_lock(&intr_cpu_lock);
-    tmp = env->intr_flag;
-    env->intr_flag = 1;
-    spin_unlock(&intr_cpu_lock);
-
-    return tmp;
-}
-
-uint64_t helper_rc(void)
-{
-    uint64_t tmp;
-
-    spin_lock(&intr_cpu_lock);
-    tmp = env->intr_flag;
-    env->intr_flag = 0;
-    spin_unlock(&intr_cpu_lock);
-
-    return tmp;
+    cpu_alpha_store_fpcr (env, val);
  }
  
  uint64_t helper_addqv (uint64_t op1, uint64_t op2)
@@ -141,7 +52,7 @@ uint64_t helper_addqv (uint64_t op1, uint64_t op2)
      uint64_t tmp = op1;
      op1 += op2;
      if (unlikely((tmp ^ op2 ^ (-1ULL)) & (tmp ^ op1) & (1ULL << 63))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
      }
      return op1;
  }
@@ -151,29 +62,29 @@ uint64_t helper_addlv (uint64_t op1, uint64_t op2)
      uint64_t tmp = op1;
      op1 = (uint32_t)(op1 + op2);
      if (unlikely((tmp ^ op2 ^ (-1UL)) & (tmp ^ op1) & (1UL << 31))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
      }
      return op1;
  }
  
  uint64_t helper_subqv (uint64_t op1, uint64_t op2)
  {
-    uint64_t tmp = op1;
-    op1 -= op2;
-    if (unlikely(((~tmp) ^ op1 ^ (-1ULL)) & ((~tmp) ^ op2) & (1ULL << 63))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+    uint64_t res;
+    res = op1 - op2;
+    if (unlikely((op1 ^ op2) & (res ^ op1) & (1ULL << 63))) {
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
      }
-    return op1;
+    return res;
  }
  
  uint64_t helper_sublv (uint64_t op1, uint64_t op2)
  {
-    uint64_t tmp = op1;
-    op1 = (uint32_t)(op1 - op2);
-    if (unlikely(((~tmp) ^ op1 ^ (-1UL)) & ((~tmp) ^ op2) & (1UL << 31))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+    uint32_t res;
+    res = op1 - op2;
+    if (unlikely((op1 ^ op2) & (res ^ op1) & (1UL << 31))) {
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
      }
-    return op1;
+    return res;
  }
  
  uint64_t helper_mullv (uint64_t op1, uint64_t op2)
@@ -181,7 +92,7 @@ uint64_t helper_mullv (uint64_t op1, uint64_t op2)
      int64_t res = (int64_t)op1 * (int64_t)op2;
  
      if (unlikely((int32_t)res != res)) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
      }
      return (int64_t)((int32_t)res);
  }
@@ -193,7 +104,7 @@ uint64_t helper_mulqv (uint64_t op1, uint64_t op2)
      muls64(&tl, &th, op1, op2);
      /* If th != 0 && th != -1, then we had an overflow */
      if (unlikely((th + 1) > 1)) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
      }
      return tl;
  }
@@ -221,7 +132,7 @@ uint64_t helper_cttz (uint64_t arg)
      return ctz64(arg);
  }
  
-static always_inline uint64_t byte_zap (uint64_t op, uint8_t mskb)
+static inline uint64_t byte_zap(uint64_t op, uint8_t mskb)
  {
      uint64_t mask;
  
@@ -238,120 +149,335 @@ static always_inline uint64_t byte_zap (uint64_t op, uint8_t mskb)
      return op & ~mask;
  }
  
-uint64_t helper_mskbl(uint64_t val, uint64_t mask)
+uint64_t helper_zap(uint64_t val, uint64_t mask)
+{
+    return byte_zap(val, mask);
+}
+
+uint64_t helper_zapnot(uint64_t val, uint64_t mask)
+{
+    return byte_zap(val, ~mask);
+}
+
+uint64_t helper_cmpbge (uint64_t op1, uint64_t op2)
+{
+    uint8_t opa, opb, res;
+    int i;
+
+    res = 0;
+    for (i = 0; i < 8; i++) {
+        opa = op1 >> (i * 8);
+        opb = op2 >> (i * 8);
+        if (opa >= opb)
+            res |= 1 << i;
+    }
+    return res;
+}
+
+uint64_t helper_minub8 (uint64_t op1, uint64_t op2)
+{
+    uint64_t res = 0;
+    uint8_t opa, opb, opr;
+    int i;
+
+    for (i = 0; i < 8; ++i) {
+        opa = op1 >> (i * 8);
+        opb = op2 >> (i * 8);
+        opr = opa < opb ? opa : opb;
+        res |= (uint64_t)opr << (i * 8);
+    }
+    return res;
+}
+
+uint64_t helper_minsb8 (uint64_t op1, uint64_t op2)
  {
-    return byte_zap(val, 0x01 << (mask & 7));
+    uint64_t res = 0;
+    int8_t opa, opb;
+    uint8_t opr;
+    int i;
+
+    for (i = 0; i < 8; ++i) {
+        opa = op1 >> (i * 8);
+        opb = op2 >> (i * 8);
+        opr = opa < opb ? opa : opb;
+        res |= (uint64_t)opr << (i * 8);
+    }
+    return res;
  }
  
-uint64_t helper_insbl(uint64_t val, uint64_t mask)
+uint64_t helper_minuw4 (uint64_t op1, uint64_t op2)
  {
-    val <<= (mask & 7) * 8;
-    return byte_zap(val, ~(0x01 << (mask & 7)));
+    uint64_t res = 0;
+    uint16_t opa, opb, opr;
+    int i;
+
+    for (i = 0; i < 4; ++i) {
+        opa = op1 >> (i * 16);
+        opb = op2 >> (i * 16);
+        opr = opa < opb ? opa : opb;
+        res |= (uint64_t)opr << (i * 16);
+    }
+    return res;
  }
  
-uint64_t helper_mskwl(uint64_t val, uint64_t mask)
+uint64_t helper_minsw4 (uint64_t op1, uint64_t op2)
  {
-    return byte_zap(val, 0x03 << (mask & 7));
+    uint64_t res = 0;
+    int16_t opa, opb;
+    uint16_t opr;
+    int i;
+
+    for (i = 0; i < 4; ++i) {
+        opa = op1 >> (i * 16);
+        opb = op2 >> (i * 16);
+        opr = opa < opb ? opa : opb;
+        res |= (uint64_t)opr << (i * 16);
+    }
+    return res;
  }
  
-uint64_t helper_inswl(uint64_t val, uint64_t mask)
+uint64_t helper_maxub8 (uint64_t op1, uint64_t op2)
  {
-    val <<= (mask & 7) * 8;
-    return byte_zap(val, ~(0x03 << (mask & 7)));
+    uint64_t res = 0;
+    uint8_t opa, opb, opr;
+    int i;
+
+    for (i = 0; i < 8; ++i) {
+        opa = op1 >> (i * 8);
+        opb = op2 >> (i * 8);
+        opr = opa > opb ? opa : opb;
+        res |= (uint64_t)opr << (i * 8);
+    }
+    return res;
  }
  
-uint64_t helper_mskll(uint64_t val, uint64_t mask)
+uint64_t helper_maxsb8 (uint64_t op1, uint64_t op2)
  {
-    return byte_zap(val, 0x0F << (mask & 7));
+    uint64_t res = 0;
+    int8_t opa, opb;
+    uint8_t opr;
+    int i;
+
+    for (i = 0; i < 8; ++i) {
+        opa = op1 >> (i * 8);
+        opb = op2 >> (i * 8);
+        opr = opa > opb ? opa : opb;
+        res |= (uint64_t)opr << (i * 8);
+    }
+    return res;
  }
  
-uint64_t helper_insll(uint64_t val, uint64_t mask)
+uint64_t helper_maxuw4 (uint64_t op1, uint64_t op2)
  {
-    val <<= (mask & 7) * 8;
-    return byte_zap(val, ~(0x0F << (mask & 7)));
+    uint64_t res = 0;
+    uint16_t opa, opb, opr;
+    int i;
+
+    for (i = 0; i < 4; ++i) {
+        opa = op1 >> (i * 16);
+        opb = op2 >> (i * 16);
+        opr = opa > opb ? opa : opb;
+        res |= (uint64_t)opr << (i * 16);
+    }
+    return res;
  }
  
-uint64_t helper_zap(uint64_t val, uint64_t mask)
+uint64_t helper_maxsw4 (uint64_t op1, uint64_t op2)
  {
-    return byte_zap(val, mask);
+    uint64_t res = 0;
+    int16_t opa, opb;
+    uint16_t opr;
+    int i;
+
+    for (i = 0; i < 4; ++i) {
+        opa = op1 >> (i * 16);
+        opb = op2 >> (i * 16);
+        opr = opa > opb ? opa : opb;
+        res |= (uint64_t)opr << (i * 16);
+    }
+    return res;
  }
  
-uint64_t helper_zapnot(uint64_t val, uint64_t mask)
+uint64_t helper_perr (uint64_t op1, uint64_t op2)
  {
-    return byte_zap(val, ~mask);
+    uint64_t res = 0;
+    uint8_t opa, opb, opr;
+    int i;
+
+    for (i = 0; i < 8; ++i) {
+        opa = op1 >> (i * 8);
+        opb = op2 >> (i * 8);
+        if (opa >= opb)
+            opr = opa - opb;
+        else
+            opr = opb - opa;
+        res += opr;
+    }
+    return res;
  }
  
-uint64_t helper_mskql(uint64_t val, uint64_t mask)
+uint64_t helper_pklb (uint64_t op1)
  {
-    return byte_zap(val, 0xFF << (mask & 7));
+    return (op1 & 0xff) | ((op1 >> 24) & 0xff00);
  }
  
-uint64_t helper_insql(uint64_t val, uint64_t mask)
+uint64_t helper_pkwb (uint64_t op1)
  {
-    val <<= (mask & 7) * 8;
-    return byte_zap(val, ~(0xFF << (mask & 7)));
+    return ((op1 & 0xff)
+            | ((op1 >> 8) & 0xff00)
+            | ((op1 >> 16) & 0xff0000)
+            | ((op1 >> 24) & 0xff000000));
  }
  
-uint64_t helper_mskwh(uint64_t val, uint64_t mask)
+uint64_t helper_unpkbl (uint64_t op1)
  {
-    return byte_zap(val, (0x03 << (mask & 7)) >> 8);
+    return (op1 & 0xff) | ((op1 & 0xff00) << 24);
  }
  
-uint64_t helper_inswh(uint64_t val, uint64_t mask)
+uint64_t helper_unpkbw (uint64_t op1)
  {
-    val >>= 64 - ((mask & 7) * 8);
-    return byte_zap(val, ~((0x03 << (mask & 7)) >> 8));
+    return ((op1 & 0xff)
+            | ((op1 & 0xff00) << 8)
+            | ((op1 & 0xff0000) << 16)
+            | ((op1 & 0xff000000) << 24));
  }
  
-uint64_t helper_msklh(uint64_t val, uint64_t mask)
+/* Floating point helpers */
+
+void helper_setroundmode (uint32_t val)
  {
-    return byte_zap(val, (0x0F << (mask & 7)) >> 8);
+    set_float_rounding_mode(val, &FP_STATUS);
  }
  
-uint64_t helper_inslh(uint64_t val, uint64_t mask)
+void helper_setflushzero (uint32_t val)
  {
-    val >>= 64 - ((mask & 7) * 8);
-    return byte_zap(val, ~((0x0F << (mask & 7)) >> 8));
+    set_flush_to_zero(val, &FP_STATUS);
  }
  
-uint64_t helper_mskqh(uint64_t val, uint64_t mask)
+void helper_fp_exc_clear (void)
  {
-    return byte_zap(val, (0xFF << (mask & 7)) >> 8);
+    set_float_exception_flags(0, &FP_STATUS);
  }
  
-uint64_t helper_insqh(uint64_t val, uint64_t mask)
+uint32_t helper_fp_exc_get (void)
  {
-    val >>= 64 - ((mask & 7) * 8);
-    return byte_zap(val, ~((0xFF << (mask & 7)) >> 8));
+    return get_float_exception_flags(&FP_STATUS);
  }
  
-uint64_t helper_cmpbge (uint64_t op1, uint64_t op2)
+/* Raise exceptions for ieee fp insns without software completion.
+   In that case there are no exceptions that don't trap; the mask
+   doesn't apply.  */
+void helper_fp_exc_raise(uint32_t exc, uint32_t regno)
  {
-    uint8_t opa, opb, res;
-    int i;
+    if (exc) {
+        uint32_t hw_exc = 0;
  
-    res = 0;
-    for (i = 0; i < 8; i++) {
-        opa = op1 >> (i * 8);
-        opb = op2 >> (i * 8);
-        if (opa >= opb)
-            res |= 1 << i;
+        env->ipr[IPR_EXC_MASK] |= 1ull << regno;
+
+        if (exc & float_flag_invalid) {
+            hw_exc |= EXC_M_INV;
+        }
+        if (exc & float_flag_divbyzero) {
+            hw_exc |= EXC_M_DZE;
+        }
+        if (exc & float_flag_overflow) {
+            hw_exc |= EXC_M_FOV;
+        }
+        if (exc & float_flag_underflow) {
+            hw_exc |= EXC_M_UNF;
+        }
+        if (exc & float_flag_inexact) {
+            hw_exc |= EXC_M_INE;
+        }
+        helper_excp(EXCP_ARITH, hw_exc);
      }
-    return res;
  }
  
-/* Floating point helpers */
+/* Raise exceptions for ieee fp insns with software completion.  */
+void helper_fp_exc_raise_s(uint32_t exc, uint32_t regno)
+{
+    if (exc) {
+        env->fpcr_exc_status |= exc;
+
+        exc &= ~env->fpcr_exc_mask;
+        if (exc) {
+            helper_fp_exc_raise(exc, regno);
+        }
+    }
+}
+
+/* Input remapping without software completion.  Handle denormal-map-to-zero
+   and trap for all other non-finite numbers.  */
+uint64_t helper_ieee_input(uint64_t val)
+{
+    uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
+    uint64_t frac = val & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (frac != 0) {
+            /* If DNZ is set flush denormals to zero on input.  */
+            if (env->fpcr_dnz) {
+                val &= 1ull << 63;
+            } else {
+                helper_excp(EXCP_ARITH, EXC_M_UNF);
+            }
+        }
+    } else if (exp == 0x7ff) {
+        /* Infinity or NaN.  */
+        /* ??? I'm not sure these exception bit flags are correct.  I do
+           know that the Linux kernel, at least, doesn't rely on them and
+           just emulates the insn to figure out what exception to use.  */
+        helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV);
+    }
+    return val;
+}
+
+/* Similar, but does not trap for infinities.  Used for comparisons.  */
+uint64_t helper_ieee_input_cmp(uint64_t val)
+{
+    uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
+    uint64_t frac = val & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (frac != 0) {
+            /* If DNZ is set flush denormals to zero on input.  */
+            if (env->fpcr_dnz) {
+                val &= 1ull << 63;
+            } else {
+                helper_excp(EXCP_ARITH, EXC_M_UNF);
+            }
+        }
+    } else if (exp == 0x7ff && frac) {
+        /* NaN.  */
+        helper_excp(EXCP_ARITH, EXC_M_INV);
+    }
+    return val;
+}
+
+/* Input remapping with software completion enabled.  All we have to do
+   is handle denormal-map-to-zero; all other inputs get exceptions as
+   needed from the actual operation.  */
+uint64_t helper_ieee_input_s(uint64_t val)
+{
+    if (env->fpcr_dnz) {
+        uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
+        if (exp == 0) {
+            val &= 1ull << 63;
+        }
+    }
+    return val;
+}
  
  /* F floating (VAX) */
-static always_inline uint64_t float32_to_f (float32 fa)
+static inline uint64_t float32_to_f(float32 fa)
  {
-    uint32_t a;
      uint64_t r, exp, mant, sig;
+    CPU_FloatU a;
  
-    a = *(uint32_t*)(&fa);
-    sig = ((uint64_t)a & 0x80000000) << 32;
-    exp = (a >> 23) & 0xff;
-    mant = ((uint64_t)a & 0x007fffff) << 29;
+    a.f = fa;
+    sig = ((uint64_t)a.l & 0x80000000) << 32;
+    exp = (a.l >> 23) & 0xff;
+    mant = ((uint64_t)a.l & 0x007fffff) << 29;
  
      if (exp == 255) {
          /* NaN or infinity */
@@ -376,9 +502,10 @@ static always_inline uint64_t float32_to_f (float32 fa)
      return r;
  }
  
-static always_inline float32 f_to_float32 (uint64_t a)
+static inline float32 f_to_float32(uint64_t a)
  {
-    uint32_t r, exp, mant_sig;
+    uint32_t exp, mant_sig;
+    CPU_FloatU r;
  
      exp = ((a >> 55) & 0x80) | ((a >> 52) & 0x7f);
      mant_sig = ((a >> 32) & 0x80000000) | ((a >> 29) & 0x007fffff);
@@ -390,12 +517,12 @@ static always_inline float32 f_to_float32 (uint64_t a)
  
      if (exp < 3) {
          /* Underflow */
-        r = 0;
+        r.l = 0;
      } else {
-        r = ((exp - 2) << 23) | mant_sig;
+        r.l = ((exp - 2) << 23) | mant_sig;
      }
  
-    return *(float32*)(&a);
+    return r.f;
  }
  
  uint32_t helper_f_to_memory (uint64_t a)
@@ -418,6 +545,9 @@ uint64_t helper_memory_to_f (uint32_t a)
      return r;
  }
  
+/* ??? Emulating VAX arithmetic with IEEE arithmetic is wrong.  We should
+   either implement VAX arithmetic properly or just signal invalid opcode.  */
+
  uint64_t helper_addf (uint64_t a, uint64_t b)
  {
      float32 fa, fb, fr;
@@ -469,14 +599,15 @@ uint64_t helper_sqrtf (uint64_t t)
  
  
  /* G floating (VAX) */
-static always_inline uint64_t float64_to_g (float64 fa)
+static inline uint64_t float64_to_g(float64 fa)
  {
-    uint64_t a, r, exp, mant, sig;
+    uint64_t r, exp, mant, sig;
+    CPU_DoubleU a;
  
-    a = *(uint64_t*)(&fa);
-    sig = a & 0x8000000000000000ull;
-    exp = (a >> 52) & 0x7ff;
-    mant = a & 0x000fffffffffffffull;
+    a.d = fa;
+    sig = a.ll & 0x8000000000000000ull;
+    exp = (a.ll >> 52) & 0x7ff;
+    mant = a.ll & 0x000fffffffffffffull;
  
      if (exp == 2047) {
          /* NaN or infinity */
@@ -501,9 +632,10 @@ static always_inline uint64_t float64_to_g (float64 fa)
      return r;
  }
  
-static always_inline float64 g_to_float64 (uint64_t a)
+static inline float64 g_to_float64(uint64_t a)
  {
-    uint64_t r, exp, mant_sig;
+    uint64_t exp, mant_sig;
+    CPU_DoubleU r;
  
      exp = (a >> 52) & 0x7ff;
      mant_sig = a & 0x800fffffffffffffull;
@@ -515,12 +647,12 @@ static always_inline float64 g_to_float64 (uint64_t a)
  
      if (exp < 3) {
          /* Underflow */
-        r = 0;
+        r.ll = 0;
      } else {
-        r = ((exp - 2) << 52) | mant_sig;
+        r.ll = ((exp - 2) << 52) | mant_sig;
      }
  
-    return *(float64*)(&a);
+    return r.d;
  }
  
  uint64_t helper_g_to_memory (uint64_t a)
@@ -594,36 +726,57 @@ uint64_t helper_sqrtg (uint64_t a)
  
  
  /* S floating (single) */
-static always_inline uint64_t float32_to_s (float32 fa)
+
+/* Taken from linux/arch/alpha/kernel/traps.c, s_mem_to_reg.  */
+static inline uint64_t float32_to_s_int(uint32_t fi)
  {
-    uint32_t a;
-    uint64_t r;
+    uint32_t frac = fi & 0x7fffff;
+    uint32_t sign = fi >> 31;
+    uint32_t exp_msb = (fi >> 30) & 1;
+    uint32_t exp_low = (fi >> 23) & 0x7f;
+    uint32_t exp;
  
-    a = *(uint32_t*)(&fa);
+    exp = (exp_msb << 10) | exp_low;
+    if (exp_msb) {
+        if (exp_low == 0x7f)
+            exp = 0x7ff;
+    } else {
+        if (exp_low != 0x00)
+            exp |= 0x380;
+    }
  
-    r = (((uint64_t)(a & 0xc0000000)) << 32) | (((uint64_t)(a & 0x3fffffff)) << 29);
-    if (((a & 0x7f800000) != 0x7f800000) && (!(a & 0x40000000)))
-        r |= 0x7ll << 59;
-    return r;
+    return (((uint64_t)sign << 63)
+            | ((uint64_t)exp << 52)
+            | ((uint64_t)frac << 29));
+}
+
+static inline uint64_t float32_to_s(float32 fa)
+{
+    CPU_FloatU a;
+    a.f = fa;
+    return float32_to_s_int(a.l);
  }
  
-static always_inline float32 s_to_float32 (uint64_t a)
+static inline uint32_t s_to_float32_int(uint64_t a)
  {
-    uint32_t r = ((a >> 32) & 0xc0000000) | ((a >> 29) & 0x3fffffff);
-    return *(float32*)(&r);
+    return ((a >> 32) & 0xc0000000) | ((a >> 29) & 0x3fffffff);
+}
+
+static inline float32 s_to_float32(uint64_t a)
+{
+    CPU_FloatU r;
+    r.l = s_to_float32_int(a);
+    return r.f;
  }
  
  uint32_t helper_s_to_memory (uint64_t a)
  {
-    /* Memory format is the same as float32 */
-    float32 fa = s_to_float32(a);
-    return *(uint32_t*)(&fa);
+    return s_to_float32_int(a);
  }
  
  uint64_t helper_memory_to_s (uint32_t a)
  {
-    /* Memory format is the same as float32 */
-    return float32_to_s(*(float32*)(&a));
+    return float32_to_s_int(a);
  }
  
  uint64_t helper_adds (uint64_t a, uint64_t b)
@@ -677,16 +830,20 @@ uint64_t helper_sqrts (uint64_t a)
  
  
  /* T floating (double) */
-static always_inline float64 t_to_float64 (uint64_t a)
+static inline float64 t_to_float64(uint64_t a)
  {
      /* Memory format is the same as float64 */
-    return *(float64*)(&a);
+    CPU_DoubleU r;
+    r.ll = a;
+    return r.d;
  }
  
-static always_inline uint64_t float64_to_t (float64 fa)
+static inline uint64_t float64_to_t(float64 fa)
  {
      /* Memory format is the same as float64 */
-    return *(uint64*)(&fa);
+    CPU_DoubleU r;
+    r.d = fa;
+    return r.ll;
  }
  
  uint64_t helper_addt (uint64_t a, uint64_t b)
@@ -738,24 +895,6 @@ uint64_t helper_sqrtt (uint64_t a)
      return float64_to_t(fr);
  }
  
-
-/* Sign copy */
-uint64_t helper_cpys(uint64_t a, uint64_t b)
-{
-    return (a & 0x8000000000000000ULL) | (b & ~0x8000000000000000ULL);
-}
-
-uint64_t helper_cpysn(uint64_t a, uint64_t b)
-{
-    return ((~a) & 0x8000000000000000ULL) | (b & ~0x8000000000000000ULL);
-}
-
-uint64_t helper_cpyse(uint64_t a, uint64_t b)
-{
-    return (a & 0xFFF0000000000000ULL) | (b & ~0xFFF0000000000000ULL);
-}
-
-
  /* Comparisons */
  uint64_t helper_cmptun (uint64_t a, uint64_t b)
  {
@@ -848,37 +987,6 @@ uint64_t helper_cmpglt(uint64_t a, uint64_t b)
          return 0;
  }
  
-uint64_t helper_cmpfeq (uint64_t a)
-{
-    return !(a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfne (uint64_t a)
-{
-    return (a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpflt (uint64_t a)
-{
-    return (a & 0x8000000000000000ULL) && (a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfle (uint64_t a)
-{
-    return (a & 0x8000000000000000ULL) || !(a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfgt (uint64_t a)
-{
-    return !(a & 0x8000000000000000ULL) && (a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-uint64_t helper_cmpfge (uint64_t a)
-{
-    return !(a & 0x8000000000000000ULL) || !(a & 0x7FFFFFFFFFFFFFFFULL);
-}
-
-
  /* Floating point format conversion */
  uint64_t helper_cvtts (uint64_t a)
  {
@@ -906,10 +1014,107 @@ uint64_t helper_cvtqs (uint64_t a)
      return float32_to_s(fr);
  }
  
-uint64_t helper_cvttq (uint64_t a)
+/* Implement float64 to uint64 conversion without saturation -- we must
+   supply the truncated result.  This behaviour is used by the compiler
+   to get unsigned conversion for free with the same instruction.
+
+   The VI flag is set when overflow or inexact exceptions should be raised.  */
+
+static inline uint64_t helper_cvttq_internal(uint64_t a, int roundmode, int VI)
  {
-    float64 fa = t_to_float64(a);
-    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
+    uint64_t frac, ret = 0;
+    uint32_t exp, sign, exc = 0;
+    int shift;
+
+    sign = (a >> 63);
+    exp = (uint32_t)(a >> 52) & 0x7ff;
+    frac = a & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (unlikely(frac != 0)) {
+            goto do_underflow;
+        }
+    } else if (exp == 0x7ff) {
+        exc = (frac ? float_flag_invalid : VI ? float_flag_overflow : 0);
+    } else {
+        /* Restore implicit bit.  */
+        frac |= 0x10000000000000ull;
+
+        shift = exp - 1023 - 52;
+        if (shift >= 0) {
+            /* In this case the number is so large that we must shift
+               the fraction left.  There is no rounding to do.  */
+            if (shift < 63) {
+                ret = frac << shift;
+                if (VI && (ret >> shift) != frac) {
+                    exc = float_flag_overflow;
+                }
+            }
+        } else {
+            uint64_t round;
+
+            /* In this case the number is smaller than the fraction as
+               represented by the 52 bit number.  Here we must think
+               about rounding the result.  Handle this by shifting the
+               fractional part of the number into the high bits of ROUND.
+               This will let us efficiently handle round-to-nearest.  */
+            shift = -shift;
+            if (shift < 63) {
+                ret = frac >> shift;
+                round = frac << (64 - shift);
+            } else {
+                /* The exponent is so small we shift out everything.
+                   Leave a sticky bit for proper rounding below.  */
+            do_underflow:
+                round = 1;
+            }
+
+            if (round) {
+                exc = (VI ? float_flag_inexact : 0);
+                switch (roundmode) {
+                case float_round_nearest_even:
+                    if (round == (1ull << 63)) {
+                        /* Fraction is exactly 0.5; round to even.  */
+                        ret += (ret & 1);
+                    } else if (round > (1ull << 63)) {
+                        ret += 1;
+                    }
+                    break;
+                case float_round_to_zero:
+                    break;
+                case float_round_up:
+                    ret += 1 - sign;
+                    break;
+                case float_round_down:
+                    ret += sign;
+                    break;
+                }
+            }
+        }
+        if (sign) {
+            ret = -ret;
+        }
+    }
+    if (unlikely(exc)) {
+        float_raise(exc, &FP_STATUS);
+    }
+
+    return ret;
+}
+
+uint64_t helper_cvttq(uint64_t a)
+{
+    return helper_cvttq_internal(a, FP_STATUS.float_rounding_mode, 1);
+}
+
+uint64_t helper_cvttq_c(uint64_t a)
+{
+    return helper_cvttq_internal(a, float_round_to_zero, 0);
+}
+
+uint64_t helper_cvttq_svic(uint64_t a)
+{
+    return helper_cvttq_internal(a, float_round_to_zero, 1);
  }
  
  uint64_t helper_cvtqt (uint64_t a)
@@ -947,48 +1152,14 @@ uint64_t helper_cvtqg (uint64_t a)
      return float64_to_g(fr);
  }
  
-uint64_t helper_cvtlq (uint64_t a)
-{
-    return (int64_t)((int32_t)((a >> 32) | ((a >> 29) & 0x3FFFFFFF)));
-}
-
-static always_inline uint64_t __helper_cvtql (uint64_t a, int s, int v)
-{
-    uint64_t r;
-
-    r = ((uint64_t)(a & 0xC0000000)) << 32;
-    r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29;
-
-    if (v && (int64_t)((int32_t)r) != (int64_t)r) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
-    }
-    if (s) {
-        /* TODO */
-    }
-    return r;
-}
-
-uint64_t helper_cvtql (uint64_t a)
-{
-    return __helper_cvtql(a, 0, 0);
-}
-
-uint64_t helper_cvtqlv (uint64_t a)
-{
-    return __helper_cvtql(a, 0, 1);
-}
-
-uint64_t helper_cvtqlsv (uint64_t a)
-{
-    return __helper_cvtql(a, 1, 1);
-}
-
  /* PALcode support special instructions */
  #if !defined (CONFIG_USER_ONLY)
  void helper_hw_rei (void)
  {
      env->pc = env->ipr[IPR_EXC_ADDR] & ~3;
      env->ipr[IPR_EXC_ADDR] = env->ipr[IPR_EXC_ADDR] & 1;
+    env->intr_flag = 0;
+    env->lock_addr = -1;
      /* XXX: re-enable interrupts and memory mapping */
  }
  
@@ -996,6 +1167,8 @@ void helper_hw_ret (uint64_t a)
  {
      env->pc = a & ~3;
      env->ipr[IPR_EXC_ADDR] = a & 1;
+    env->intr_flag = 0;
+    env->lock_addr = -1;
      /* XXX: re-enable interrupts and memory mapping */
  }